{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Lending Club 2016年Q3数据：https://www.lendingclub.com/info/download-data.action\n",
    "\n",
    "参考：http://kldavenport.com/lending-club-data-analysis-revisted-with-python/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"./LoanStats_2016Q3.csv\",skiprows=1,low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 99124 entries, 0 to 99123\n",
      "Columns: 122 entries, id to sec_app_mths_since_last_major_derog\n",
      "dtypes: float64(97), object(25)\n",
      "memory usage: 92.3+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>member_id</th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>sub_grade</th>\n",
       "      <th>...</th>\n",
       "      <th>sec_app_earliest_cr_line</th>\n",
       "      <th>sec_app_inq_last_6mths</th>\n",
       "      <th>sec_app_mort_acc</th>\n",
       "      <th>sec_app_open_acc</th>\n",
       "      <th>sec_app_revol_util</th>\n",
       "      <th>sec_app_open_il_6m</th>\n",
       "      <th>sec_app_num_rev_accts</th>\n",
       "      <th>sec_app_chargeoff_within_12_mths</th>\n",
       "      <th>sec_app_collections_12_mths_ex_med</th>\n",
       "      <th>sec_app_mths_since_last_major_derog</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.99%</td>\n",
       "      <td>512.60</td>\n",
       "      <td>C</td>\n",
       "      <td>C3</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>8.99%</td>\n",
       "      <td>82.67</td>\n",
       "      <td>B</td>\n",
       "      <td>B1</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>21.49%</td>\n",
       "      <td>880.02</td>\n",
       "      <td>D</td>\n",
       "      <td>D5</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 122 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    id  member_id  loan_amnt  funded_amnt  funded_amnt_inv        term  \\\n",
       "0  NaN        NaN    15000.0      15000.0          15000.0   36 months   \n",
       "1  NaN        NaN     2600.0       2600.0           2600.0   36 months   \n",
       "2  NaN        NaN    32200.0      32200.0          32200.0   60 months   \n",
       "\n",
       "  int_rate  installment grade sub_grade                 ...                  \\\n",
       "0   13.99%       512.60     C        C3                 ...                   \n",
       "1    8.99%        82.67     B        B1                 ...                   \n",
       "2   21.49%       880.02     D        D5                 ...                   \n",
       "\n",
       "  sec_app_earliest_cr_line sec_app_inq_last_6mths sec_app_mort_acc  \\\n",
       "0                      NaN                    NaN              NaN   \n",
       "1                      NaN                    NaN              NaN   \n",
       "2                      NaN                    NaN              NaN   \n",
       "\n",
       "   sec_app_open_acc sec_app_revol_util sec_app_open_il_6m  \\\n",
       "0               NaN                NaN                NaN   \n",
       "1               NaN                NaN                NaN   \n",
       "2               NaN                NaN                NaN   \n",
       "\n",
       "  sec_app_num_rev_accts sec_app_chargeoff_within_12_mths  \\\n",
       "0                   NaN                              NaN   \n",
       "1                   NaN                              NaN   \n",
       "2                   NaN                              NaN   \n",
       "\n",
       "   sec_app_collections_12_mths_ex_med sec_app_mths_since_last_major_derog  \n",
       "0                                 NaN                                 NaN  \n",
       "1                                 NaN                                 NaN  \n",
       "2                                 NaN                                 NaN  \n",
       "\n",
       "[3 rows x 122 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Keep what we need"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>member_id</th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.99%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>8.99%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>21.49%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>11.49%</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6000.0</td>\n",
       "      <td>6000.0</td>\n",
       "      <td>6000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.49%</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    id  member_id  loan_amnt  funded_amnt  funded_amnt_inv        term  \\\n",
       "0  NaN        NaN    15000.0      15000.0          15000.0   36 months   \n",
       "1  NaN        NaN     2600.0       2600.0           2600.0   36 months   \n",
       "2  NaN        NaN    32200.0      32200.0          32200.0   60 months   \n",
       "3  NaN        NaN    10000.0      10000.0          10000.0   36 months   \n",
       "4  NaN        NaN     6000.0       6000.0           6000.0   36 months   \n",
       "\n",
       "  int_rate  \n",
       "0   13.99%  \n",
       "1    8.99%  \n",
       "2   21.49%  \n",
       "3   11.49%  \n",
       "4   13.49%  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# .ix[row slice, column slice] \n",
    "df.ix[:4,:7]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.drop('id',1,inplace=True)\n",
    "df.drop('member_id',1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df.int_rate = pd.Series(df.int_rate).str.replace('%', '').astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.99</td>\n",
       "      <td>512.60</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2600.0</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>8.99</td>\n",
       "      <td>82.67</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>32200.0</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>60 months</td>\n",
       "      <td>21.49</td>\n",
       "      <td>880.02</td>\n",
       "      <td>D</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>11.49</td>\n",
       "      <td>329.72</td>\n",
       "      <td>B</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>6000.0</td>\n",
       "      <td>6000.0</td>\n",
       "      <td>36 months</td>\n",
       "      <td>13.49</td>\n",
       "      <td>203.59</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  funded_amnt  funded_amnt_inv        term  int_rate  installment  \\\n",
       "0    15000.0      15000.0          15000.0   36 months     13.99       512.60   \n",
       "1     2600.0       2600.0           2600.0   36 months      8.99        82.67   \n",
       "2    32200.0      32200.0          32200.0   60 months     21.49       880.02   \n",
       "3    10000.0      10000.0          10000.0   36 months     11.49       329.72   \n",
       "4     6000.0       6000.0           6000.0   36 months     13.49       203.59   \n",
       "\n",
       "  grade  \n",
       "0     C  \n",
       "1     B  \n",
       "2     D  \n",
       "3     B  \n",
       "4     C  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.ix[:4,:7]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Loan Amount Requested Verus the Funded Amount"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False    99120\n",
      "True         4\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print (df.loan_amnt != df.funded_amnt).value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>term</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>grade</th>\n",
       "      <th>sub_grade</th>\n",
       "      <th>emp_title</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>...</th>\n",
       "      <th>sec_app_earliest_cr_line</th>\n",
       "      <th>sec_app_inq_last_6mths</th>\n",
       "      <th>sec_app_mort_acc</th>\n",
       "      <th>sec_app_open_acc</th>\n",
       "      <th>sec_app_revol_util</th>\n",
       "      <th>sec_app_open_il_6m</th>\n",
       "      <th>sec_app_num_rev_accts</th>\n",
       "      <th>sec_app_chargeoff_within_12_mths</th>\n",
       "      <th>sec_app_collections_12_mths_ex_med</th>\n",
       "      <th>sec_app_mths_since_last_major_derog</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>99120</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99121</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99122</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99123</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4 rows × 120 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       loan_amnt  funded_amnt  funded_amnt_inv term  int_rate  installment  \\\n",
       "99120        NaN          NaN              NaN  NaN       NaN          NaN   \n",
       "99121        NaN          NaN              NaN  NaN       NaN          NaN   \n",
       "99122        NaN          NaN              NaN  NaN       NaN          NaN   \n",
       "99123        NaN          NaN              NaN  NaN       NaN          NaN   \n",
       "\n",
       "      grade sub_grade emp_title emp_length  \\\n",
       "99120   NaN       NaN       NaN        NaN   \n",
       "99121   NaN       NaN       NaN        NaN   \n",
       "99122   NaN       NaN       NaN        NaN   \n",
       "99123   NaN       NaN       NaN        NaN   \n",
       "\n",
       "                      ...                 sec_app_earliest_cr_line  \\\n",
       "99120                 ...                                      NaN   \n",
       "99121                 ...                                      NaN   \n",
       "99122                 ...                                      NaN   \n",
       "99123                 ...                                      NaN   \n",
       "\n",
       "       sec_app_inq_last_6mths sec_app_mort_acc sec_app_open_acc  \\\n",
       "99120                     NaN              NaN              NaN   \n",
       "99121                     NaN              NaN              NaN   \n",
       "99122                     NaN              NaN              NaN   \n",
       "99123                     NaN              NaN              NaN   \n",
       "\n",
       "      sec_app_revol_util sec_app_open_il_6m  sec_app_num_rev_accts  \\\n",
       "99120                NaN                NaN                    NaN   \n",
       "99121                NaN                NaN                    NaN   \n",
       "99122                NaN                NaN                    NaN   \n",
       "99123                NaN                NaN                    NaN   \n",
       "\n",
       "      sec_app_chargeoff_within_12_mths sec_app_collections_12_mths_ex_med  \\\n",
       "99120                              NaN                                NaN   \n",
       "99121                              NaN                                NaN   \n",
       "99122                              NaN                                NaN   \n",
       "99123                              NaN                                NaN   \n",
       "\n",
       "      sec_app_mths_since_last_major_derog  \n",
       "99120                                 NaN  \n",
       "99121                                 NaN  \n",
       "99122                                 NaN  \n",
       "99123                                 NaN  \n",
       "\n",
       "[4 rows x 120 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.query('loan_amnt != funded_amnt').head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df.dropna(axis=0, how='all',inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 99120 entries, 0 to 99119\n",
      "Columns: 120 entries, loan_amnt to sec_app_mths_since_last_major_derog\n",
      "dtypes: float64(97), object(23)\n",
      "memory usage: 91.5+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df.dropna(axis=1, how='all',inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 99120 entries, 0 to 99119\n",
      "Columns: 108 entries, loan_amnt to total_il_high_credit_limit\n",
      "dtypes: float64(85), object(23)\n",
      "memory usage: 82.4+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>emp_title</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>verification_status</th>\n",
       "      <th>issue_d</th>\n",
       "      <th>loan_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Fiscal Director</td>\n",
       "      <td>2 years</td>\n",
       "      <td>RENT</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Current</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Loaner Coordinator</td>\n",
       "      <td>3 years</td>\n",
       "      <td>RENT</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Fully Paid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>warehouse/supervisor</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Fully Paid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Teacher</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>OWN</td>\n",
       "      <td>55900.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Current</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SERVICE MGR</td>\n",
       "      <td>5 years</td>\n",
       "      <td>RENT</td>\n",
       "      <td>33000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Current</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>General Manager</td>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>109000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Current</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              emp_title emp_length home_ownership  annual_inc  \\\n",
       "0       Fiscal Director    2 years           RENT     55000.0   \n",
       "1    Loaner Coordinator    3 years           RENT     35000.0   \n",
       "2  warehouse/supervisor  10+ years       MORTGAGE     65000.0   \n",
       "3               Teacher  10+ years            OWN     55900.0   \n",
       "4           SERVICE MGR    5 years           RENT     33000.0   \n",
       "5       General Manager  10+ years       MORTGAGE    109000.0   \n",
       "\n",
       "  verification_status issue_d loan_status  \n",
       "0        Not Verified  Sep-16     Current  \n",
       "1     Source Verified  Sep-16  Fully Paid  \n",
       "2        Not Verified  Sep-16  Fully Paid  \n",
       "3        Not Verified  Sep-16     Current  \n",
       "4        Not Verified  Sep-16     Current  \n",
       "5     Source Verified  Sep-16     Current  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.ix[:5,8:15]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### emp_title: employment title"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Teacher       1931\n",
      "Manager       1701\n",
      "Owner          990\n",
      "Supervisor     785\n",
      "Driver         756\n",
      "Name: emp_title, dtype: int64\n",
      "Agent Services Representative           1\n",
      "Operator Bridge Tunnel                  1\n",
      "Reg Medical Assistant/Referral Spec.    1\n",
      "Home Health Care                        1\n",
      "rounds cook                             1\n",
      "Name: emp_title, dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(37421,)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print df.emp_title.value_counts().head()\n",
    "print df.emp_title.value_counts().tail()\n",
    "df.emp_title.unique().shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.drop(['emp_title'],1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>emp_length</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>verification_status</th>\n",
       "      <th>issue_d</th>\n",
       "      <th>loan_status</th>\n",
       "      <th>pymnt_plan</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2 years</td>\n",
       "      <td>RENT</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Current</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3 years</td>\n",
       "      <td>RENT</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Fully Paid</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10+ years</td>\n",
       "      <td>OWN</td>\n",
       "      <td>55900.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Current</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5 years</td>\n",
       "      <td>RENT</td>\n",
       "      <td>33000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Current</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10+ years</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>109000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Sep-16</td>\n",
       "      <td>Current</td>\n",
       "      <td>n</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  emp_length home_ownership  annual_inc verification_status issue_d  \\\n",
       "0    2 years           RENT     55000.0        Not Verified  Sep-16   \n",
       "1    3 years           RENT     35000.0     Source Verified  Sep-16   \n",
       "2  10+ years       MORTGAGE     65000.0        Not Verified  Sep-16   \n",
       "3  10+ years            OWN     55900.0        Not Verified  Sep-16   \n",
       "4    5 years           RENT     33000.0        Not Verified  Sep-16   \n",
       "5  10+ years       MORTGAGE    109000.0     Source Verified  Sep-16   \n",
       "\n",
       "  loan_status pymnt_plan  \n",
       "0     Current          n  \n",
       "1  Fully Paid          n  \n",
       "2  Fully Paid          n  \n",
       "3     Current          n  \n",
       "4     Current          n  \n",
       "5     Current          n  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.ix[:5,8:15]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### emp_length: employment length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10+ years    34219\n",
       "2 years       9066\n",
       "3 years       7925\n",
       "< 1 year      7104\n",
       "1 year        6991\n",
       "5 years       6170\n",
       "4 years       6022\n",
       "n/a           5922\n",
       "6 years       4406\n",
       "8 years       4168\n",
       "9 years       3922\n",
       "7 years       3205\n",
       "Name: emp_length, dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.emp_length.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df.replace('n/a', np.nan,inplace=True)\n",
    "df.emp_length.fillna(value=0,inplace=True)\n",
    "df['emp_length'].replace(to_replace='[^0-9]+', value='', inplace=True, regex=True)\n",
    "df['emp_length'] = df['emp_length'].astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10    34219\n",
       "1     14095\n",
       "2      9066\n",
       "3      7925\n",
       "5      6170\n",
       "4      6022\n",
       "0      5922\n",
       "6      4406\n",
       "8      4168\n",
       "9      3922\n",
       "7      3205\n",
       "Name: emp_length, dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.emp_length.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### verification status:\"Indicates if income was verified by LC, not verified, or if the income source was verified\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Source Verified    40781\n",
       "Verified           31356\n",
       "Not Verified       26983\n",
       "Name: verification_status, dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.verification_status.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Target: Loan Statuses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 99120 entries, 0 to 99119\n",
      "Columns: 107 entries, loan_amnt to total_il_high_credit_limit\n",
      "dtypes: float64(85), int64(1), object(21)\n",
      "memory usage: 81.7+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'loan_amnt', u'funded_amnt', u'funded_amnt_inv', u'term', u'int_rate',\n",
       "       u'installment', u'grade', u'sub_grade', u'emp_length',\n",
       "       u'home_ownership',\n",
       "       ...\n",
       "       u'num_tl_90g_dpd_24m', u'num_tl_op_past_12m', u'pct_tl_nvr_dlq',\n",
       "       u'percent_bc_gt_75', u'pub_rec_bankruptcies', u'tax_liens',\n",
       "       u'tot_hi_cred_lim', u'total_bal_ex_mort', u'total_bc_limit',\n",
       "       u'total_il_high_credit_limit'],\n",
       "      dtype='object', length=107)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Current', 'Fully Paid', 'Late (31-120 days)', 'Charged Off',\n",
       "       'Late (16-30 days)', 'In Grace Period', 'Default'], dtype=object)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.unique(df['loan_status'].values.ravel())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Column term has 2 unique instances\n",
      "Column grade has 7 unique instances\n",
      "Column sub_grade has 35 unique instances\n",
      "Column home_ownership has 4 unique instances\n",
      "Column verification_status has 3 unique instances\n",
      "Column issue_d has 3 unique instances\n",
      "Column loan_status has 7 unique instances\n",
      "Column pymnt_plan has 2 unique instances\n",
      "Column desc has 6 unique instances\n",
      "Column purpose has 13 unique instances\n",
      "Column title has 13 unique instances\n",
      "Column zip_code has 873 unique instances\n",
      "Column addr_state has 50 unique instances\n",
      "Column earliest_cr_line has 614 unique instances\n",
      "Column revol_util has 1087 unique instances\n",
      "Column initial_list_status has 2 unique instances\n",
      "Column last_pymnt_d has 13 unique instances\n",
      "Column next_pymnt_d has 4 unique instances\n",
      "Column last_credit_pull_d has 14 unique instances\n",
      "Column application_type has 3 unique instances\n",
      "Column verification_status_joint has 2 unique instances\n"
     ]
    }
   ],
   "source": [
    "for col in df.select_dtypes(include=['object']).columns:\n",
    "    print (\"Column {} has {} unique instances\".format( col, len(df[col].unique())) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>unique</th>\n",
       "      <th>top</th>\n",
       "      <th>freq</th>\n",
       "      <th>missing_pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>term</th>\n",
       "      <td>99120</td>\n",
       "      <td>2</td>\n",
       "      <td>36 months</td>\n",
       "      <td>73898</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>grade</th>\n",
       "      <td>99120</td>\n",
       "      <td>7</td>\n",
       "      <td>C</td>\n",
       "      <td>32846</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sub_grade</th>\n",
       "      <td>99120</td>\n",
       "      <td>35</td>\n",
       "      <td>B5</td>\n",
       "      <td>8322</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>home_ownership</th>\n",
       "      <td>99120</td>\n",
       "      <td>4</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>46761</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>verification_status</th>\n",
       "      <td>99120</td>\n",
       "      <td>3</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>40781</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>issue_d</th>\n",
       "      <td>99120</td>\n",
       "      <td>3</td>\n",
       "      <td>Aug-16</td>\n",
       "      <td>36280</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>loan_status</th>\n",
       "      <td>99120</td>\n",
       "      <td>7</td>\n",
       "      <td>Current</td>\n",
       "      <td>79445</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pymnt_plan</th>\n",
       "      <td>99120</td>\n",
       "      <td>2</td>\n",
       "      <td>n</td>\n",
       "      <td>99074</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>desc</th>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td></td>\n",
       "      <td>2</td>\n",
       "      <td>0.999939</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>purpose</th>\n",
       "      <td>99120</td>\n",
       "      <td>13</td>\n",
       "      <td>debt_consolidation</td>\n",
       "      <td>57682</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>title</th>\n",
       "      <td>93693</td>\n",
       "      <td>12</td>\n",
       "      <td>Debt consolidation</td>\n",
       "      <td>53999</td>\n",
       "      <td>0.054752</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zip_code</th>\n",
       "      <td>99120</td>\n",
       "      <td>873</td>\n",
       "      <td>112xx</td>\n",
       "      <td>1125</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>addr_state</th>\n",
       "      <td>99120</td>\n",
       "      <td>50</td>\n",
       "      <td>CA</td>\n",
       "      <td>13352</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>earliest_cr_line</th>\n",
       "      <td>99120</td>\n",
       "      <td>614</td>\n",
       "      <td>Aug-03</td>\n",
       "      <td>796</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>revol_util</th>\n",
       "      <td>99060</td>\n",
       "      <td>1086</td>\n",
       "      <td>0%</td>\n",
       "      <td>440</td>\n",
       "      <td>0.000605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>initial_list_status</th>\n",
       "      <td>99120</td>\n",
       "      <td>2</td>\n",
       "      <td>w</td>\n",
       "      <td>71869</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>last_pymnt_d</th>\n",
       "      <td>98991</td>\n",
       "      <td>12</td>\n",
       "      <td>Jun-17</td>\n",
       "      <td>81082</td>\n",
       "      <td>0.001301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>next_pymnt_d</th>\n",
       "      <td>83552</td>\n",
       "      <td>3</td>\n",
       "      <td>Jul-17</td>\n",
       "      <td>83527</td>\n",
       "      <td>0.157062</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>last_credit_pull_d</th>\n",
       "      <td>99115</td>\n",
       "      <td>13</td>\n",
       "      <td>Jun-17</td>\n",
       "      <td>89280</td>\n",
       "      <td>0.000050</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>application_type</th>\n",
       "      <td>99120</td>\n",
       "      <td>3</td>\n",
       "      <td>INDIVIDUAL</td>\n",
       "      <td>98565</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>verification_status_joint</th>\n",
       "      <td>517</td>\n",
       "      <td>1</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>517</td>\n",
       "      <td>0.994784</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           count unique                 top   freq  \\\n",
       "term                       99120      2           36 months  73898   \n",
       "grade                      99120      7                   C  32846   \n",
       "sub_grade                  99120     35                  B5   8322   \n",
       "home_ownership             99120      4            MORTGAGE  46761   \n",
       "verification_status        99120      3     Source Verified  40781   \n",
       "issue_d                    99120      3              Aug-16  36280   \n",
       "loan_status                99120      7             Current  79445   \n",
       "pymnt_plan                 99120      2                   n  99074   \n",
       "desc                           6      5                          2   \n",
       "purpose                    99120     13  debt_consolidation  57682   \n",
       "title                      93693     12  Debt consolidation  53999   \n",
       "zip_code                   99120    873               112xx   1125   \n",
       "addr_state                 99120     50                  CA  13352   \n",
       "earliest_cr_line           99120    614              Aug-03    796   \n",
       "revol_util                 99060   1086                  0%    440   \n",
       "initial_list_status        99120      2                   w  71869   \n",
       "last_pymnt_d               98991     12              Jun-17  81082   \n",
       "next_pymnt_d               83552      3              Jul-17  83527   \n",
       "last_credit_pull_d         99115     13              Jun-17  89280   \n",
       "application_type           99120      3          INDIVIDUAL  98565   \n",
       "verification_status_joint    517      1        Not Verified    517   \n",
       "\n",
       "                           missing_pct  \n",
       "term                          0.000000  \n",
       "grade                         0.000000  \n",
       "sub_grade                     0.000000  \n",
       "home_ownership                0.000000  \n",
       "verification_status           0.000000  \n",
       "issue_d                       0.000000  \n",
       "loan_status                   0.000000  \n",
       "pymnt_plan                    0.000000  \n",
       "desc                          0.999939  \n",
       "purpose                       0.000000  \n",
       "title                         0.054752  \n",
       "zip_code                      0.000000  \n",
       "addr_state                    0.000000  \n",
       "earliest_cr_line              0.000000  \n",
       "revol_util                    0.000605  \n",
       "initial_list_status           0.000000  \n",
       "last_pymnt_d                  0.001301  \n",
       "next_pymnt_d                  0.157062  \n",
       "last_credit_pull_d            0.000050  \n",
       "application_type              0.000000  \n",
       "verification_status_joint     0.994784  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 处理对象类型的缺失，unique\n",
    "df.select_dtypes(include=['O']).describe().T.\\\n",
    "assign(missing_pct=df.apply(lambda x : (len(x)-x.count())/float(len(x))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.revol_util = pd.Series(df.revol_util).str.replace('%', '').astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# missing_pct\n",
    "df.drop('desc',1,inplace=True)\n",
    "df.drop('verification_status_joint',1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df.drop('zip_code',1,inplace=True)\n",
    "df.drop('addr_state',1,inplace=True)\n",
    "df.drop('earliest_cr_line',1,inplace=True)\n",
    "df.drop('revol_util',1,inplace=True)\n",
    "df.drop('purpose',1,inplace=True)\n",
    "df.drop('title',1,inplace=True)\n",
    "df.drop('term',1,inplace=True)\n",
    "df.drop('issue_d',1,inplace=True)\n",
    "# df.drop('',1,inplace=True)\n",
    "# 贷后相关的字段\n",
    "df.drop(['out_prncp','out_prncp_inv','total_pymnt',\n",
    "         'total_pymnt_inv','total_rec_prncp', 'grade', 'sub_grade'] ,1, inplace=True)\n",
    "df.drop(['total_rec_int','total_rec_late_fee',\n",
    "         'recoveries','collection_recovery_fee',\n",
    "         'collection_recovery_fee' ],1, inplace=True)\n",
    "df.drop(['last_pymnt_d','last_pymnt_amnt',\n",
    "         'next_pymnt_d','last_credit_pull_d'],1, inplace=True)\n",
    "df.drop(['policy_code'],1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 99120 entries, 0 to 99119\n",
      "Data columns (total 81 columns):\n",
      "loan_amnt                         99120 non-null float64\n",
      "funded_amnt                       99120 non-null float64\n",
      "funded_amnt_inv                   99120 non-null float64\n",
      "int_rate                          99120 non-null float64\n",
      "installment                       99120 non-null float64\n",
      "emp_length                        99120 non-null int64\n",
      "home_ownership                    99120 non-null object\n",
      "annual_inc                        99120 non-null float64\n",
      "verification_status               99120 non-null object\n",
      "loan_status                       99120 non-null object\n",
      "pymnt_plan                        99120 non-null object\n",
      "dti                               99120 non-null float64\n",
      "delinq_2yrs                       99120 non-null float64\n",
      "inq_last_6mths                    99120 non-null float64\n",
      "mths_since_last_delinq            53366 non-null float64\n",
      "mths_since_last_record            19792 non-null float64\n",
      "open_acc                          99120 non-null float64\n",
      "pub_rec                           99120 non-null float64\n",
      "revol_bal                         99120 non-null float64\n",
      "total_acc                         99120 non-null float64\n",
      "initial_list_status               99120 non-null object\n",
      "collections_12_mths_ex_med        99120 non-null float64\n",
      "mths_since_last_major_derog       29372 non-null float64\n",
      "application_type                  99120 non-null object\n",
      "annual_inc_joint                  517 non-null float64\n",
      "dti_joint                         517 non-null float64\n",
      "acc_now_delinq                    99120 non-null float64\n",
      "tot_coll_amt                      99120 non-null float64\n",
      "tot_cur_bal                       99120 non-null float64\n",
      "open_acc_6m                       99120 non-null float64\n",
      "open_il_6m                        99120 non-null float64\n",
      "open_il_12m                       99120 non-null float64\n",
      "open_il_24m                       99120 non-null float64\n",
      "mths_since_rcnt_il                96469 non-null float64\n",
      "total_bal_il                      99120 non-null float64\n",
      "il_util                           85480 non-null float64\n",
      "open_rv_12m                       99120 non-null float64\n",
      "open_rv_24m                       99120 non-null float64\n",
      "max_bal_bc                        99120 non-null float64\n",
      "all_util                          99114 non-null float64\n",
      "total_rev_hi_lim                  99120 non-null float64\n",
      "inq_fi                            99120 non-null float64\n",
      "total_cu_tl                       99120 non-null float64\n",
      "inq_last_12m                      99120 non-null float64\n",
      "acc_open_past_24mths              99120 non-null float64\n",
      "avg_cur_bal                       99120 non-null float64\n",
      "bc_open_to_buy                    98010 non-null float64\n",
      "bc_util                           97971 non-null float64\n",
      "chargeoff_within_12_mths          99120 non-null float64\n",
      "delinq_amnt                       99120 non-null float64\n",
      "mo_sin_old_il_acct                96469 non-null float64\n",
      "mo_sin_old_rev_tl_op              99120 non-null float64\n",
      "mo_sin_rcnt_rev_tl_op             99120 non-null float64\n",
      "mo_sin_rcnt_tl                    99120 non-null float64\n",
      "mort_acc                          99120 non-null float64\n",
      "mths_since_recent_bc              98067 non-null float64\n",
      "mths_since_recent_bc_dlq          26018 non-null float64\n",
      "mths_since_recent_inq             89254 non-null float64\n",
      "mths_since_recent_revol_delinq    36606 non-null float64\n",
      "num_accts_ever_120_pd             99120 non-null float64\n",
      "num_actv_bc_tl                    99120 non-null float64\n",
      "num_actv_rev_tl                   99120 non-null float64\n",
      "num_bc_sats                       99120 non-null float64\n",
      "num_bc_tl                         99120 non-null float64\n",
      "num_il_tl                         99120 non-null float64\n",
      "num_op_rev_tl                     99120 non-null float64\n",
      "num_rev_accts                     99120 non-null float64\n",
      "num_rev_tl_bal_gt_0               99120 non-null float64\n",
      "num_sats                          99120 non-null float64\n",
      "num_tl_120dpd_2m                  95661 non-null float64\n",
      "num_tl_30dpd                      99120 non-null float64\n",
      "num_tl_90g_dpd_24m                99120 non-null float64\n",
      "num_tl_op_past_12m                99120 non-null float64\n",
      "pct_tl_nvr_dlq                    99120 non-null float64\n",
      "percent_bc_gt_75                  98006 non-null float64\n",
      "pub_rec_bankruptcies              99120 non-null float64\n",
      "tax_liens                         99120 non-null float64\n",
      "tot_hi_cred_lim                   99120 non-null float64\n",
      "total_bal_ex_mort                 99120 non-null float64\n",
      "total_bc_limit                    99120 non-null float64\n",
      "total_il_high_credit_limit        99120 non-null float64\n",
      "dtypes: float64(74), int64(1), object(6)\n",
      "memory usage: 62.0+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loan_amnt</th>\n",
       "      <th>funded_amnt</th>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <th>int_rate</th>\n",
       "      <th>installment</th>\n",
       "      <th>emp_length</th>\n",
       "      <th>home_ownership</th>\n",
       "      <th>annual_inc</th>\n",
       "      <th>verification_status</th>\n",
       "      <th>loan_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>15000.0</td>\n",
       "      <td>13.99</td>\n",
       "      <td>512.60</td>\n",
       "      <td>2</td>\n",
       "      <td>RENT</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Current</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2600.0</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>8.99</td>\n",
       "      <td>82.67</td>\n",
       "      <td>3</td>\n",
       "      <td>RENT</td>\n",
       "      <td>35000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>32200.0</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>32200.0</td>\n",
       "      <td>21.49</td>\n",
       "      <td>880.02</td>\n",
       "      <td>10</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>65000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Fully Paid</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10000.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>11.49</td>\n",
       "      <td>329.72</td>\n",
       "      <td>10</td>\n",
       "      <td>OWN</td>\n",
       "      <td>55900.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Current</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6000.0</td>\n",
       "      <td>6000.0</td>\n",
       "      <td>6000.0</td>\n",
       "      <td>13.49</td>\n",
       "      <td>203.59</td>\n",
       "      <td>5</td>\n",
       "      <td>RENT</td>\n",
       "      <td>33000.0</td>\n",
       "      <td>Not Verified</td>\n",
       "      <td>Current</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>30000.0</td>\n",
       "      <td>30000.0</td>\n",
       "      <td>30000.0</td>\n",
       "      <td>13.99</td>\n",
       "      <td>697.90</td>\n",
       "      <td>10</td>\n",
       "      <td>MORTGAGE</td>\n",
       "      <td>109000.0</td>\n",
       "      <td>Source Verified</td>\n",
       "      <td>Current</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   loan_amnt  funded_amnt  funded_amnt_inv  int_rate  installment  emp_length  \\\n",
       "0    15000.0      15000.0          15000.0     13.99       512.60           2   \n",
       "1     2600.0       2600.0           2600.0      8.99        82.67           3   \n",
       "2    32200.0      32200.0          32200.0     21.49       880.02          10   \n",
       "3    10000.0      10000.0          10000.0     11.49       329.72          10   \n",
       "4     6000.0       6000.0           6000.0     13.49       203.59           5   \n",
       "5    30000.0      30000.0          30000.0     13.99       697.90          10   \n",
       "\n",
       "  home_ownership  annual_inc verification_status loan_status  \n",
       "0           RENT     55000.0        Not Verified     Current  \n",
       "1           RENT     35000.0     Source Verified  Fully Paid  \n",
       "2       MORTGAGE     65000.0        Not Verified  Fully Paid  \n",
       "3            OWN     55900.0        Not Verified     Current  \n",
       "4           RENT     33000.0        Not Verified     Current  \n",
       "5       MORTGAGE    109000.0     Source Verified     Current  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.ix[:5,:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pymnt_plan</th>\n",
       "      <th>dti</th>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <th>mths_since_last_delinq</th>\n",
       "      <th>mths_since_last_record</th>\n",
       "      <th>open_acc</th>\n",
       "      <th>pub_rec</th>\n",
       "      <th>revol_bal</th>\n",
       "      <th>total_acc</th>\n",
       "      <th>initial_list_status</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>n</td>\n",
       "      <td>23.78</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21345.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>f</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>n</td>\n",
       "      <td>6.73</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>720.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>n</td>\n",
       "      <td>11.71</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>87.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11987.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>n</td>\n",
       "      <td>26.21</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>17209.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>n</td>\n",
       "      <td>19.05</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4576.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>f</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>n</td>\n",
       "      <td>16.24</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11337.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>w</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  pymnt_plan    dti  delinq_2yrs  inq_last_6mths  mths_since_last_delinq  \\\n",
       "0          n  23.78          1.0             0.0                     7.0   \n",
       "1          n   6.73          0.0             0.0                     NaN   \n",
       "2          n  11.71          0.0             1.0                     NaN   \n",
       "3          n  26.21          0.0             2.0                     NaN   \n",
       "4          n  19.05          0.0             0.0                     NaN   \n",
       "5          n  16.24          0.0             0.0                     NaN   \n",
       "\n",
       "   mths_since_last_record  open_acc  pub_rec  revol_bal  total_acc  \\\n",
       "0                     NaN      22.0      0.0    21345.0       43.0   \n",
       "1                     NaN      14.0      0.0      720.0       24.0   \n",
       "2                    87.0      17.0      1.0    11987.0       34.0   \n",
       "3                     NaN      15.0      0.0    17209.0       62.0   \n",
       "4                     NaN       3.0      0.0     4576.0       11.0   \n",
       "5                     NaN      17.0      0.0    11337.0       39.0   \n",
       "\n",
       "  initial_list_status  \n",
       "0                   f  \n",
       "1                   w  \n",
       "2                   w  \n",
       "3                   w  \n",
       "4                   f  \n",
       "5                   w  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.ix[:5,10:21]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index([u'loan_amnt', u'funded_amnt', u'funded_amnt_inv', u'int_rate',\n",
      "       u'installment', u'emp_length', u'home_ownership', u'annual_inc',\n",
      "       u'verification_status', u'loan_status', u'pymnt_plan', u'dti',\n",
      "       u'delinq_2yrs', u'inq_last_6mths', u'mths_since_last_delinq',\n",
      "       u'mths_since_last_record', u'open_acc', u'pub_rec', u'revol_bal',\n",
      "       u'total_acc', u'initial_list_status', u'collections_12_mths_ex_med',\n",
      "       u'mths_since_last_major_derog', u'application_type',\n",
      "       u'annual_inc_joint', u'dti_joint', u'acc_now_delinq', u'tot_coll_amt',\n",
      "       u'tot_cur_bal', u'open_acc_6m', u'open_il_6m', u'open_il_12m',\n",
      "       u'open_il_24m', u'mths_since_rcnt_il', u'total_bal_il', u'il_util',\n",
      "       u'open_rv_12m', u'open_rv_24m', u'max_bal_bc', u'all_util',\n",
      "       u'total_rev_hi_lim', u'inq_fi', u'total_cu_tl', u'inq_last_12m',\n",
      "       u'acc_open_past_24mths', u'avg_cur_bal', u'bc_open_to_buy', u'bc_util',\n",
      "       u'chargeoff_within_12_mths', u'delinq_amnt', u'mo_sin_old_il_acct',\n",
      "       u'mo_sin_old_rev_tl_op', u'mo_sin_rcnt_rev_tl_op', u'mo_sin_rcnt_tl',\n",
      "       u'mort_acc', u'mths_since_recent_bc', u'mths_since_recent_bc_dlq',\n",
      "       u'mths_since_recent_inq', u'mths_since_recent_revol_delinq',\n",
      "       u'num_accts_ever_120_pd', u'num_actv_bc_tl', u'num_actv_rev_tl',\n",
      "       u'num_bc_sats', u'num_bc_tl', u'num_il_tl', u'num_op_rev_tl',\n",
      "       u'num_rev_accts', u'num_rev_tl_bal_gt_0', u'num_sats',\n",
      "       u'num_tl_120dpd_2m', u'num_tl_30dpd', u'num_tl_90g_dpd_24m',\n",
      "       u'num_tl_op_past_12m', u'pct_tl_nvr_dlq', u'percent_bc_gt_75',\n",
      "       u'pub_rec_bankruptcies', u'tax_liens', u'tot_hi_cred_lim',\n",
      "       u'total_bal_ex_mort', u'total_bc_limit', u'total_il_high_credit_limit'],\n",
      "      dtype='object')\n",
      "[[15000.0 15000.0 15000.0 13.99 512.6 2 'RENT' 55000.0 'Not Verified'\n",
      "  'Current' 'n' 23.78 1.0 0.0 7.0 nan 22.0 0.0 21345.0 43.0 'f' 0.0 nan\n",
      "  'INDIVIDUAL' nan nan 0.0 0.0 140492.0 3.0 10.0 2.0 3.0 11.0 119147.0\n",
      "  101.0 3.0 4.0 14612.0 83.0 39000.0 1.0 6.0 0.0 7.0 6386.0 9645.0 73.1 0.0\n",
      "  0.0 157.0 248.0 4.0 4.0 0.0 4.0 7.0 22.0 7.0 0.0 5.0 9.0 6.0 7.0 25.0\n",
      "  11.0 18.0 9.0 22.0 0.0 0.0 0.0 5.0 100.0 33.3 0.0 0.0 147587.0 140492.0\n",
      "  30200.0 108587.0]]\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 99120 entries, 0 to 99119\n",
      "Data columns (total 81 columns):\n",
      "loan_amnt                         99120 non-null float64\n",
      "funded_amnt                       99120 non-null float64\n",
      "funded_amnt_inv                   99120 non-null float64\n",
      "int_rate                          99120 non-null float64\n",
      "installment                       99120 non-null float64\n",
      "emp_length                        99120 non-null int64\n",
      "home_ownership                    99120 non-null object\n",
      "annual_inc                        99120 non-null float64\n",
      "verification_status               99120 non-null object\n",
      "loan_status                       99120 non-null object\n",
      "pymnt_plan                        99120 non-null object\n",
      "dti                               99120 non-null float64\n",
      "delinq_2yrs                       99120 non-null float64\n",
      "inq_last_6mths                    99120 non-null float64\n",
      "mths_since_last_delinq            53366 non-null float64\n",
      "mths_since_last_record            19792 non-null float64\n",
      "open_acc                          99120 non-null float64\n",
      "pub_rec                           99120 non-null float64\n",
      "revol_bal                         99120 non-null float64\n",
      "total_acc                         99120 non-null float64\n",
      "initial_list_status               99120 non-null object\n",
      "collections_12_mths_ex_med        99120 non-null float64\n",
      "mths_since_last_major_derog       29372 non-null float64\n",
      "application_type                  99120 non-null object\n",
      "annual_inc_joint                  517 non-null float64\n",
      "dti_joint                         517 non-null float64\n",
      "acc_now_delinq                    99120 non-null float64\n",
      "tot_coll_amt                      99120 non-null float64\n",
      "tot_cur_bal                       99120 non-null float64\n",
      "open_acc_6m                       99120 non-null float64\n",
      "open_il_6m                        99120 non-null float64\n",
      "open_il_12m                       99120 non-null float64\n",
      "open_il_24m                       99120 non-null float64\n",
      "mths_since_rcnt_il                96469 non-null float64\n",
      "total_bal_il                      99120 non-null float64\n",
      "il_util                           85480 non-null float64\n",
      "open_rv_12m                       99120 non-null float64\n",
      "open_rv_24m                       99120 non-null float64\n",
      "max_bal_bc                        99120 non-null float64\n",
      "all_util                          99114 non-null float64\n",
      "total_rev_hi_lim                  99120 non-null float64\n",
      "inq_fi                            99120 non-null float64\n",
      "total_cu_tl                       99120 non-null float64\n",
      "inq_last_12m                      99120 non-null float64\n",
      "acc_open_past_24mths              99120 non-null float64\n",
      "avg_cur_bal                       99120 non-null float64\n",
      "bc_open_to_buy                    98010 non-null float64\n",
      "bc_util                           97971 non-null float64\n",
      "chargeoff_within_12_mths          99120 non-null float64\n",
      "delinq_amnt                       99120 non-null float64\n",
      "mo_sin_old_il_acct                96469 non-null float64\n",
      "mo_sin_old_rev_tl_op              99120 non-null float64\n",
      "mo_sin_rcnt_rev_tl_op             99120 non-null float64\n",
      "mo_sin_rcnt_tl                    99120 non-null float64\n",
      "mort_acc                          99120 non-null float64\n",
      "mths_since_recent_bc              98067 non-null float64\n",
      "mths_since_recent_bc_dlq          26018 non-null float64\n",
      "mths_since_recent_inq             89254 non-null float64\n",
      "mths_since_recent_revol_delinq    36606 non-null float64\n",
      "num_accts_ever_120_pd             99120 non-null float64\n",
      "num_actv_bc_tl                    99120 non-null float64\n",
      "num_actv_rev_tl                   99120 non-null float64\n",
      "num_bc_sats                       99120 non-null float64\n",
      "num_bc_tl                         99120 non-null float64\n",
      "num_il_tl                         99120 non-null float64\n",
      "num_op_rev_tl                     99120 non-null float64\n",
      "num_rev_accts                     99120 non-null float64\n",
      "num_rev_tl_bal_gt_0               99120 non-null float64\n",
      "num_sats                          99120 non-null float64\n",
      "num_tl_120dpd_2m                  95661 non-null float64\n",
      "num_tl_30dpd                      99120 non-null float64\n",
      "num_tl_90g_dpd_24m                99120 non-null float64\n",
      "num_tl_op_past_12m                99120 non-null float64\n",
      "pct_tl_nvr_dlq                    99120 non-null float64\n",
      "percent_bc_gt_75                  98006 non-null float64\n",
      "pub_rec_bankruptcies              99120 non-null float64\n",
      "tax_liens                         99120 non-null float64\n",
      "tot_hi_cred_lim                   99120 non-null float64\n",
      "total_bal_ex_mort                 99120 non-null float64\n",
      "total_bc_limit                    99120 non-null float64\n",
      "total_il_high_credit_limit        99120 non-null float64\n",
      "dtypes: float64(74), int64(1), object(6)\n",
      "memory usage: 62.0+ MB\n"
     ]
    }
   ],
   "source": [
    "print df.columns\n",
    "print df.head(1).values\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ting/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile\n",
      "  RuntimeWarning)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "      <th>missing_pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>loan_amnt</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>14170.570521</td>\n",
       "      <td>8886.138758</td>\n",
       "      <td>1000.00</td>\n",
       "      <td>7200.00</td>\n",
       "      <td>12000.00</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>40000.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>funded_amnt</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>14170.570521</td>\n",
       "      <td>8886.138758</td>\n",
       "      <td>1000.00</td>\n",
       "      <td>7200.00</td>\n",
       "      <td>12000.00</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>40000.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>funded_amnt_inv</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>14166.087823</td>\n",
       "      <td>8883.301328</td>\n",
       "      <td>1000.00</td>\n",
       "      <td>7200.00</td>\n",
       "      <td>12000.00</td>\n",
       "      <td>20000.00</td>\n",
       "      <td>40000.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>int_rate</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>13.723641</td>\n",
       "      <td>4.873910</td>\n",
       "      <td>5.32</td>\n",
       "      <td>10.49</td>\n",
       "      <td>12.79</td>\n",
       "      <td>15.59</td>\n",
       "      <td>30.99</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>installment</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>432.718654</td>\n",
       "      <td>272.678596</td>\n",
       "      <td>30.12</td>\n",
       "      <td>235.24</td>\n",
       "      <td>361.38</td>\n",
       "      <td>569.83</td>\n",
       "      <td>1535.71</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>annual_inc</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>78488.850081</td>\n",
       "      <td>72694.186060</td>\n",
       "      <td>0.00</td>\n",
       "      <td>48000.00</td>\n",
       "      <td>65448.00</td>\n",
       "      <td>94000.00</td>\n",
       "      <td>8400000.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dti</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>18.348651</td>\n",
       "      <td>64.057603</td>\n",
       "      <td>0.00</td>\n",
       "      <td>11.91</td>\n",
       "      <td>17.60</td>\n",
       "      <td>23.90</td>\n",
       "      <td>9999.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>delinq_2yrs</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.381901</td>\n",
       "      <td>0.988996</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>21.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>inq_last_6mths</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.570521</td>\n",
       "      <td>0.863796</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mths_since_last_delinq</th>\n",
       "      <td>53366.0</td>\n",
       "      <td>33.229172</td>\n",
       "      <td>21.820407</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>142.00</td>\n",
       "      <td>0.461602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mths_since_last_record</th>\n",
       "      <td>19792.0</td>\n",
       "      <td>67.267886</td>\n",
       "      <td>24.379343</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>119.00</td>\n",
       "      <td>0.800323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>open_acc</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>11.718251</td>\n",
       "      <td>5.730585</td>\n",
       "      <td>1.00</td>\n",
       "      <td>8.00</td>\n",
       "      <td>11.00</td>\n",
       "      <td>15.00</td>\n",
       "      <td>86.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pub_rec</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.266596</td>\n",
       "      <td>0.719193</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>61.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>revol_bal</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>15536.628047</td>\n",
       "      <td>21537.790599</td>\n",
       "      <td>0.00</td>\n",
       "      <td>5657.00</td>\n",
       "      <td>10494.00</td>\n",
       "      <td>18501.50</td>\n",
       "      <td>876178.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>total_acc</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>24.033545</td>\n",
       "      <td>11.929761</td>\n",
       "      <td>2.00</td>\n",
       "      <td>15.00</td>\n",
       "      <td>22.00</td>\n",
       "      <td>31.00</td>\n",
       "      <td>119.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>collections_12_mths_ex_med</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.021640</td>\n",
       "      <td>0.168331</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>10.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mths_since_last_major_derog</th>\n",
       "      <td>29372.0</td>\n",
       "      <td>44.449612</td>\n",
       "      <td>22.254529</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>165.00</td>\n",
       "      <td>0.703672</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>annual_inc_joint</th>\n",
       "      <td>517.0</td>\n",
       "      <td>118120.418472</td>\n",
       "      <td>51131.323819</td>\n",
       "      <td>26943.12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>400000.00</td>\n",
       "      <td>0.994784</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dti_joint</th>\n",
       "      <td>517.0</td>\n",
       "      <td>18.637621</td>\n",
       "      <td>6.602016</td>\n",
       "      <td>2.56</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>48.58</td>\n",
       "      <td>0.994784</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>acc_now_delinq</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.006709</td>\n",
       "      <td>0.086902</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tot_coll_amt</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>281.797639</td>\n",
       "      <td>1840.699443</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>172575.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tot_cur_bal</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>138845.606144</td>\n",
       "      <td>156736.843591</td>\n",
       "      <td>0.00</td>\n",
       "      <td>28689.00</td>\n",
       "      <td>76447.50</td>\n",
       "      <td>207194.75</td>\n",
       "      <td>3764968.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>open_acc_6m</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.978743</td>\n",
       "      <td>1.176973</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>13.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>open_il_6m</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>2.825888</td>\n",
       "      <td>3.109225</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>43.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>open_il_12m</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.723467</td>\n",
       "      <td>0.973888</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>13.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>open_il_24m</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>1.624818</td>\n",
       "      <td>1.656628</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>26.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mths_since_rcnt_il</th>\n",
       "      <td>96469.0</td>\n",
       "      <td>21.362531</td>\n",
       "      <td>26.563455</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>503.00</td>\n",
       "      <td>0.026745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>total_bal_il</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>35045.324193</td>\n",
       "      <td>41981.617996</td>\n",
       "      <td>0.00</td>\n",
       "      <td>9179.00</td>\n",
       "      <td>23199.00</td>\n",
       "      <td>45672.00</td>\n",
       "      <td>1547285.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>il_util</th>\n",
       "      <td>85480.0</td>\n",
       "      <td>71.599158</td>\n",
       "      <td>23.306731</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1000.00</td>\n",
       "      <td>0.137611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>open_rv_12m</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>1.408142</td>\n",
       "      <td>1.570068</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>24.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mo_sin_old_rev_tl_op</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>177.634322</td>\n",
       "      <td>95.327498</td>\n",
       "      <td>3.00</td>\n",
       "      <td>115.00</td>\n",
       "      <td>160.00</td>\n",
       "      <td>227.00</td>\n",
       "      <td>901.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mo_sin_rcnt_rev_tl_op</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>13.145369</td>\n",
       "      <td>16.695022</td>\n",
       "      <td>0.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>8.00</td>\n",
       "      <td>16.00</td>\n",
       "      <td>274.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mo_sin_rcnt_tl</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>7.833232</td>\n",
       "      <td>8.649843</td>\n",
       "      <td>0.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>10.00</td>\n",
       "      <td>268.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mort_acc</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>1.467585</td>\n",
       "      <td>1.799513</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>45.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mths_since_recent_bc</th>\n",
       "      <td>98067.0</td>\n",
       "      <td>23.623512</td>\n",
       "      <td>31.750632</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>546.00</td>\n",
       "      <td>0.010623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mths_since_recent_bc_dlq</th>\n",
       "      <td>26018.0</td>\n",
       "      <td>38.095280</td>\n",
       "      <td>22.798229</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>162.00</td>\n",
       "      <td>0.737510</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mths_since_recent_inq</th>\n",
       "      <td>89254.0</td>\n",
       "      <td>6.626504</td>\n",
       "      <td>5.967648</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25.00</td>\n",
       "      <td>0.099536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mths_since_recent_revol_delinq</th>\n",
       "      <td>36606.0</td>\n",
       "      <td>34.393132</td>\n",
       "      <td>22.371813</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>165.00</td>\n",
       "      <td>0.630690</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_accts_ever_120_pd</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.594703</td>\n",
       "      <td>1.508027</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>36.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_actv_bc_tl</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>3.628218</td>\n",
       "      <td>2.302668</td>\n",
       "      <td>0.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>47.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_actv_rev_tl</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>5.625272</td>\n",
       "      <td>3.400185</td>\n",
       "      <td>0.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>7.00</td>\n",
       "      <td>59.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_bc_sats</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>4.645581</td>\n",
       "      <td>3.013399</td>\n",
       "      <td>0.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>4.00</td>\n",
       "      <td>6.00</td>\n",
       "      <td>61.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_bc_tl</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>7.416041</td>\n",
       "      <td>4.546112</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4.00</td>\n",
       "      <td>7.00</td>\n",
       "      <td>10.00</td>\n",
       "      <td>67.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_il_tl</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>8.597437</td>\n",
       "      <td>7.528533</td>\n",
       "      <td>0.00</td>\n",
       "      <td>4.00</td>\n",
       "      <td>7.00</td>\n",
       "      <td>11.00</td>\n",
       "      <td>107.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_op_rev_tl</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>8.198820</td>\n",
       "      <td>4.710348</td>\n",
       "      <td>0.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>7.00</td>\n",
       "      <td>10.00</td>\n",
       "      <td>79.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_rev_accts</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>13.726312</td>\n",
       "      <td>7.963791</td>\n",
       "      <td>2.00</td>\n",
       "      <td>8.00</td>\n",
       "      <td>12.00</td>\n",
       "      <td>18.00</td>\n",
       "      <td>104.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_rev_tl_bal_gt_0</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>5.566293</td>\n",
       "      <td>3.286135</td>\n",
       "      <td>0.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>5.00</td>\n",
       "      <td>7.00</td>\n",
       "      <td>59.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_sats</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>11.673497</td>\n",
       "      <td>5.709513</td>\n",
       "      <td>1.00</td>\n",
       "      <td>8.00</td>\n",
       "      <td>11.00</td>\n",
       "      <td>14.00</td>\n",
       "      <td>85.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_tl_120dpd_2m</th>\n",
       "      <td>95661.0</td>\n",
       "      <td>0.001108</td>\n",
       "      <td>0.035695</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.00</td>\n",
       "      <td>0.034897</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_tl_30dpd</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.004348</td>\n",
       "      <td>0.068650</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_tl_90g_dpd_24m</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.101332</td>\n",
       "      <td>0.567112</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>20.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>num_tl_op_past_12m</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>2.254752</td>\n",
       "      <td>1.960084</td>\n",
       "      <td>0.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>24.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pct_tl_nvr_dlq</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>93.262828</td>\n",
       "      <td>9.696646</td>\n",
       "      <td>0.00</td>\n",
       "      <td>90.00</td>\n",
       "      <td>96.90</td>\n",
       "      <td>100.00</td>\n",
       "      <td>100.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>percent_bc_gt_75</th>\n",
       "      <td>98006.0</td>\n",
       "      <td>42.681332</td>\n",
       "      <td>36.296425</td>\n",
       "      <td>0.00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.00</td>\n",
       "      <td>0.011239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pub_rec_bankruptcies</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.150262</td>\n",
       "      <td>0.407706</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>8.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tax_liens</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>0.075393</td>\n",
       "      <td>0.517275</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.00</td>\n",
       "      <td>61.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tot_hi_cred_lim</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>172185.283394</td>\n",
       "      <td>175273.669652</td>\n",
       "      <td>2500.00</td>\n",
       "      <td>49130.75</td>\n",
       "      <td>108020.50</td>\n",
       "      <td>248473.25</td>\n",
       "      <td>3953111.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>total_bal_ex_mort</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>50818.694078</td>\n",
       "      <td>48976.640478</td>\n",
       "      <td>0.00</td>\n",
       "      <td>20913.00</td>\n",
       "      <td>37747.50</td>\n",
       "      <td>64216.25</td>\n",
       "      <td>1548128.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>total_bc_limit</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>20862.228420</td>\n",
       "      <td>20721.900664</td>\n",
       "      <td>0.00</td>\n",
       "      <td>7700.00</td>\n",
       "      <td>14700.00</td>\n",
       "      <td>27000.00</td>\n",
       "      <td>520500.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>total_il_high_credit_limit</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>44066.340375</td>\n",
       "      <td>44473.458730</td>\n",
       "      <td>0.00</td>\n",
       "      <td>15750.00</td>\n",
       "      <td>33183.00</td>\n",
       "      <td>58963.25</td>\n",
       "      <td>2000000.00</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>74 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  count           mean            std  \\\n",
       "loan_amnt                       99120.0   14170.570521    8886.138758   \n",
       "funded_amnt                     99120.0   14170.570521    8886.138758   \n",
       "funded_amnt_inv                 99120.0   14166.087823    8883.301328   \n",
       "int_rate                        99120.0      13.723641       4.873910   \n",
       "installment                     99120.0     432.718654     272.678596   \n",
       "annual_inc                      99120.0   78488.850081   72694.186060   \n",
       "dti                             99120.0      18.348651      64.057603   \n",
       "delinq_2yrs                     99120.0       0.381901       0.988996   \n",
       "inq_last_6mths                  99120.0       0.570521       0.863796   \n",
       "mths_since_last_delinq          53366.0      33.229172      21.820407   \n",
       "mths_since_last_record          19792.0      67.267886      24.379343   \n",
       "open_acc                        99120.0      11.718251       5.730585   \n",
       "pub_rec                         99120.0       0.266596       0.719193   \n",
       "revol_bal                       99120.0   15536.628047   21537.790599   \n",
       "total_acc                       99120.0      24.033545      11.929761   \n",
       "collections_12_mths_ex_med      99120.0       0.021640       0.168331   \n",
       "mths_since_last_major_derog     29372.0      44.449612      22.254529   \n",
       "annual_inc_joint                  517.0  118120.418472   51131.323819   \n",
       "dti_joint                         517.0      18.637621       6.602016   \n",
       "acc_now_delinq                  99120.0       0.006709       0.086902   \n",
       "tot_coll_amt                    99120.0     281.797639    1840.699443   \n",
       "tot_cur_bal                     99120.0  138845.606144  156736.843591   \n",
       "open_acc_6m                     99120.0       0.978743       1.176973   \n",
       "open_il_6m                      99120.0       2.825888       3.109225   \n",
       "open_il_12m                     99120.0       0.723467       0.973888   \n",
       "open_il_24m                     99120.0       1.624818       1.656628   \n",
       "mths_since_rcnt_il              96469.0      21.362531      26.563455   \n",
       "total_bal_il                    99120.0   35045.324193   41981.617996   \n",
       "il_util                         85480.0      71.599158      23.306731   \n",
       "open_rv_12m                     99120.0       1.408142       1.570068   \n",
       "...                                 ...            ...            ...   \n",
       "mo_sin_old_rev_tl_op            99120.0     177.634322      95.327498   \n",
       "mo_sin_rcnt_rev_tl_op           99120.0      13.145369      16.695022   \n",
       "mo_sin_rcnt_tl                  99120.0       7.833232       8.649843   \n",
       "mort_acc                        99120.0       1.467585       1.799513   \n",
       "mths_since_recent_bc            98067.0      23.623512      31.750632   \n",
       "mths_since_recent_bc_dlq        26018.0      38.095280      22.798229   \n",
       "mths_since_recent_inq           89254.0       6.626504       5.967648   \n",
       "mths_since_recent_revol_delinq  36606.0      34.393132      22.371813   \n",
       "num_accts_ever_120_pd           99120.0       0.594703       1.508027   \n",
       "num_actv_bc_tl                  99120.0       3.628218       2.302668   \n",
       "num_actv_rev_tl                 99120.0       5.625272       3.400185   \n",
       "num_bc_sats                     99120.0       4.645581       3.013399   \n",
       "num_bc_tl                       99120.0       7.416041       4.546112   \n",
       "num_il_tl                       99120.0       8.597437       7.528533   \n",
       "num_op_rev_tl                   99120.0       8.198820       4.710348   \n",
       "num_rev_accts                   99120.0      13.726312       7.963791   \n",
       "num_rev_tl_bal_gt_0             99120.0       5.566293       3.286135   \n",
       "num_sats                        99120.0      11.673497       5.709513   \n",
       "num_tl_120dpd_2m                95661.0       0.001108       0.035695   \n",
       "num_tl_30dpd                    99120.0       0.004348       0.068650   \n",
       "num_tl_90g_dpd_24m              99120.0       0.101332       0.567112   \n",
       "num_tl_op_past_12m              99120.0       2.254752       1.960084   \n",
       "pct_tl_nvr_dlq                  99120.0      93.262828       9.696646   \n",
       "percent_bc_gt_75                98006.0      42.681332      36.296425   \n",
       "pub_rec_bankruptcies            99120.0       0.150262       0.407706   \n",
       "tax_liens                       99120.0       0.075393       0.517275   \n",
       "tot_hi_cred_lim                 99120.0  172185.283394  175273.669652   \n",
       "total_bal_ex_mort               99120.0   50818.694078   48976.640478   \n",
       "total_bc_limit                  99120.0   20862.228420   20721.900664   \n",
       "total_il_high_credit_limit      99120.0   44066.340375   44473.458730   \n",
       "\n",
       "                                     min       25%        50%        75%  \\\n",
       "loan_amnt                        1000.00   7200.00   12000.00   20000.00   \n",
       "funded_amnt                      1000.00   7200.00   12000.00   20000.00   \n",
       "funded_amnt_inv                  1000.00   7200.00   12000.00   20000.00   \n",
       "int_rate                            5.32     10.49      12.79      15.59   \n",
       "installment                        30.12    235.24     361.38     569.83   \n",
       "annual_inc                          0.00  48000.00   65448.00   94000.00   \n",
       "dti                                 0.00     11.91      17.60      23.90   \n",
       "delinq_2yrs                         0.00      0.00       0.00       0.00   \n",
       "inq_last_6mths                      0.00      0.00       0.00       1.00   \n",
       "mths_since_last_delinq              0.00       NaN        NaN        NaN   \n",
       "mths_since_last_record              0.00       NaN        NaN        NaN   \n",
       "open_acc                            1.00      8.00      11.00      15.00   \n",
       "pub_rec                             0.00      0.00       0.00       0.00   \n",
       "revol_bal                           0.00   5657.00   10494.00   18501.50   \n",
       "total_acc                           2.00     15.00      22.00      31.00   \n",
       "collections_12_mths_ex_med          0.00      0.00       0.00       0.00   \n",
       "mths_since_last_major_derog         0.00       NaN        NaN        NaN   \n",
       "annual_inc_joint                26943.12       NaN        NaN        NaN   \n",
       "dti_joint                           2.56       NaN        NaN        NaN   \n",
       "acc_now_delinq                      0.00      0.00       0.00       0.00   \n",
       "tot_coll_amt                        0.00      0.00       0.00       0.00   \n",
       "tot_cur_bal                         0.00  28689.00   76447.50  207194.75   \n",
       "open_acc_6m                         0.00      0.00       1.00       2.00   \n",
       "open_il_6m                          0.00      1.00       2.00       3.00   \n",
       "open_il_12m                         0.00      0.00       0.00       1.00   \n",
       "open_il_24m                         0.00      0.00       1.00       2.00   \n",
       "mths_since_rcnt_il                  0.00       NaN        NaN        NaN   \n",
       "total_bal_il                        0.00   9179.00   23199.00   45672.00   \n",
       "il_util                             0.00       NaN        NaN        NaN   \n",
       "open_rv_12m                         0.00      0.00       1.00       2.00   \n",
       "...                                  ...       ...        ...        ...   \n",
       "mo_sin_old_rev_tl_op                3.00    115.00     160.00     227.00   \n",
       "mo_sin_rcnt_rev_tl_op               0.00      3.00       8.00      16.00   \n",
       "mo_sin_rcnt_tl                      0.00      3.00       5.00      10.00   \n",
       "mort_acc                            0.00      0.00       1.00       2.00   \n",
       "mths_since_recent_bc                0.00       NaN        NaN        NaN   \n",
       "mths_since_recent_bc_dlq            0.00       NaN        NaN        NaN   \n",
       "mths_since_recent_inq               0.00       NaN        NaN        NaN   \n",
       "mths_since_recent_revol_delinq      0.00       NaN        NaN        NaN   \n",
       "num_accts_ever_120_pd               0.00      0.00       0.00       1.00   \n",
       "num_actv_bc_tl                      0.00      2.00       3.00       5.00   \n",
       "num_actv_rev_tl                     0.00      3.00       5.00       7.00   \n",
       "num_bc_sats                         0.00      3.00       4.00       6.00   \n",
       "num_bc_tl                           0.00      4.00       7.00      10.00   \n",
       "num_il_tl                           0.00      4.00       7.00      11.00   \n",
       "num_op_rev_tl                       0.00      5.00       7.00      10.00   \n",
       "num_rev_accts                       2.00      8.00      12.00      18.00   \n",
       "num_rev_tl_bal_gt_0                 0.00      3.00       5.00       7.00   \n",
       "num_sats                            1.00      8.00      11.00      14.00   \n",
       "num_tl_120dpd_2m                    0.00       NaN        NaN        NaN   \n",
       "num_tl_30dpd                        0.00      0.00       0.00       0.00   \n",
       "num_tl_90g_dpd_24m                  0.00      0.00       0.00       0.00   \n",
       "num_tl_op_past_12m                  0.00      1.00       2.00       3.00   \n",
       "pct_tl_nvr_dlq                      0.00     90.00      96.90     100.00   \n",
       "percent_bc_gt_75                    0.00       NaN        NaN        NaN   \n",
       "pub_rec_bankruptcies                0.00      0.00       0.00       0.00   \n",
       "tax_liens                           0.00      0.00       0.00       0.00   \n",
       "tot_hi_cred_lim                  2500.00  49130.75  108020.50  248473.25   \n",
       "total_bal_ex_mort                   0.00  20913.00   37747.50   64216.25   \n",
       "total_bc_limit                      0.00   7700.00   14700.00   27000.00   \n",
       "total_il_high_credit_limit          0.00  15750.00   33183.00   58963.25   \n",
       "\n",
       "                                       max  missing_pct  \n",
       "loan_amnt                         40000.00     0.000000  \n",
       "funded_amnt                       40000.00     0.000000  \n",
       "funded_amnt_inv                   40000.00     0.000000  \n",
       "int_rate                             30.99     0.000000  \n",
       "installment                        1535.71     0.000000  \n",
       "annual_inc                      8400000.00     0.000000  \n",
       "dti                                9999.00     0.000000  \n",
       "delinq_2yrs                          21.00     0.000000  \n",
       "inq_last_6mths                        5.00     0.000000  \n",
       "mths_since_last_delinq              142.00     0.461602  \n",
       "mths_since_last_record              119.00     0.800323  \n",
       "open_acc                             86.00     0.000000  \n",
       "pub_rec                              61.00     0.000000  \n",
       "revol_bal                        876178.00     0.000000  \n",
       "total_acc                           119.00     0.000000  \n",
       "collections_12_mths_ex_med           10.00     0.000000  \n",
       "mths_since_last_major_derog         165.00     0.703672  \n",
       "annual_inc_joint                 400000.00     0.994784  \n",
       "dti_joint                            48.58     0.994784  \n",
       "acc_now_delinq                        4.00     0.000000  \n",
       "tot_coll_amt                     172575.00     0.000000  \n",
       "tot_cur_bal                     3764968.00     0.000000  \n",
       "open_acc_6m                          13.00     0.000000  \n",
       "open_il_6m                           43.00     0.000000  \n",
       "open_il_12m                          13.00     0.000000  \n",
       "open_il_24m                          26.00     0.000000  \n",
       "mths_since_rcnt_il                  503.00     0.026745  \n",
       "total_bal_il                    1547285.00     0.000000  \n",
       "il_util                            1000.00     0.137611  \n",
       "open_rv_12m                          24.00     0.000000  \n",
       "...                                    ...          ...  \n",
       "mo_sin_old_rev_tl_op                901.00     0.000000  \n",
       "mo_sin_rcnt_rev_tl_op               274.00     0.000000  \n",
       "mo_sin_rcnt_tl                      268.00     0.000000  \n",
       "mort_acc                             45.00     0.000000  \n",
       "mths_since_recent_bc                546.00     0.010623  \n",
       "mths_since_recent_bc_dlq            162.00     0.737510  \n",
       "mths_since_recent_inq                25.00     0.099536  \n",
       "mths_since_recent_revol_delinq      165.00     0.630690  \n",
       "num_accts_ever_120_pd                36.00     0.000000  \n",
       "num_actv_bc_tl                       47.00     0.000000  \n",
       "num_actv_rev_tl                      59.00     0.000000  \n",
       "num_bc_sats                          61.00     0.000000  \n",
       "num_bc_tl                            67.00     0.000000  \n",
       "num_il_tl                           107.00     0.000000  \n",
       "num_op_rev_tl                        79.00     0.000000  \n",
       "num_rev_accts                       104.00     0.000000  \n",
       "num_rev_tl_bal_gt_0                  59.00     0.000000  \n",
       "num_sats                             85.00     0.000000  \n",
       "num_tl_120dpd_2m                      4.00     0.034897  \n",
       "num_tl_30dpd                          3.00     0.000000  \n",
       "num_tl_90g_dpd_24m                   20.00     0.000000  \n",
       "num_tl_op_past_12m                   24.00     0.000000  \n",
       "pct_tl_nvr_dlq                      100.00     0.000000  \n",
       "percent_bc_gt_75                    100.00     0.011239  \n",
       "pub_rec_bankruptcies                  8.00     0.000000  \n",
       "tax_liens                            61.00     0.000000  \n",
       "tot_hi_cred_lim                 3953111.00     0.000000  \n",
       "total_bal_ex_mort               1548128.00     0.000000  \n",
       "total_bc_limit                   520500.00     0.000000  \n",
       "total_il_high_credit_limit      2000000.00     0.000000  \n",
       "\n",
       "[74 rows x 9 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.select_dtypes(include=['float']).describe().T.\\\n",
    "assign(missing_pct=df.apply(lambda x : (len(x)-x.count())/float(len(x))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.drop('annual_inc_joint',1,inplace=True)\n",
    "df.drop('dti_joint',1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "      <th>missing_pct</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>emp_length</th>\n",
       "      <td>99120.0</td>\n",
       "      <td>5.757092</td>\n",
       "      <td>3.770359</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              count      mean       std  min  25%  50%   75%   max  \\\n",
       "emp_length  99120.0  5.757092  3.770359  0.0  2.0  6.0  10.0  10.0   \n",
       "\n",
       "            missing_pct  \n",
       "emp_length          0.0  "
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.select_dtypes(include=['int']).describe().T.\\\n",
    "assign(missing_pct=df.apply(lambda x : (len(x)-x.count())/float(len(x))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Target: Loan Statuses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Current               79445\n",
       "Fully Paid            13066\n",
       "Charged Off            2502\n",
       "Late (31-120 days)     2245\n",
       "In Grace Period        1407\n",
       "Late (16-30 days)       454\n",
       "Default                   1\n",
       "Name: loan_status, dtype: int64"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['loan_status'].value_counts()\n",
    "# .plot(kind='bar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0    92511\n",
       "0.0     2699\n",
       "Name: loan_status, dtype: int64"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loan_status.replace('Fully Paid', int(1),inplace=True)\n",
    "df.loan_status.replace('Current', int(1),inplace=True)\n",
    "df.loan_status.replace('Late (16-30 days)', int(0),inplace=True)\n",
    "df.loan_status.replace('Late (31-120 days)', int(0),inplace=True)\n",
    "df.loan_status.replace('Charged Off', np.nan,inplace=True)\n",
    "df.loan_status.replace('In Grace Period', np.nan,inplace=True)\n",
    "df.loan_status.replace('Default', np.nan,inplace=True)\n",
    "# df.loan_status.astype('int')\n",
    "df.loan_status.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# df.loan_status\n",
    "df.dropna(subset=['loan_status'],inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Highly Correlated Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "funded_amnt                     loan_amnt                      1.000000\n",
       "funded_amnt_inv                 loan_amnt                      0.999994\n",
       "                                funded_amnt                    0.999994\n",
       "installment                     loan_amnt                      0.953380\n",
       "                                funded_amnt                    0.953380\n",
       "                                funded_amnt_inv                0.953293\n",
       "mths_since_last_delinq          delinq_2yrs                   -0.551275\n",
       "total_acc                       open_acc                       0.722950\n",
       "mths_since_last_major_derog     mths_since_last_delinq         0.685642\n",
       "open_il_24m                     open_il_12m                    0.760219\n",
       "total_bal_il                    open_il_6m                     0.566551\n",
       "open_rv_12m                     open_acc_6m                    0.623975\n",
       "open_rv_24m                     open_rv_12m                    0.774954\n",
       "max_bal_bc                      revol_bal                      0.551409\n",
       "all_util                        il_util                        0.594925\n",
       "total_rev_hi_lim                revol_bal                      0.815351\n",
       "inq_last_12m                    inq_fi                         0.563011\n",
       "acc_open_past_24mths            open_acc_6m                    0.553181\n",
       "                                open_il_24m                    0.570853\n",
       "                                open_rv_12m                    0.657606\n",
       "                                open_rv_24m                    0.848964\n",
       "avg_cur_bal                     tot_cur_bal                    0.828457\n",
       "bc_open_to_buy                  total_rev_hi_lim               0.626380\n",
       "bc_util                         all_util                       0.569469\n",
       "mo_sin_rcnt_tl                  mo_sin_rcnt_rev_tl_op          0.606065\n",
       "mort_acc                        tot_cur_bal                    0.551198\n",
       "mths_since_recent_bc            mo_sin_rcnt_rev_tl_op          0.614262\n",
       "mths_since_recent_bc_dlq        mths_since_last_delinq         0.751613\n",
       "                                mths_since_last_major_derog    0.553022\n",
       "mths_since_recent_revol_delinq  mths_since_last_delinq         0.853573\n",
       "                                                                 ...   \n",
       "num_sats                        total_acc                      0.720022\n",
       "                                num_actv_bc_tl                 0.552957\n",
       "                                num_actv_rev_tl                0.665429\n",
       "                                num_bc_sats                    0.630778\n",
       "                                num_op_rev_tl                  0.826946\n",
       "                                num_rev_accts                  0.663595\n",
       "                                num_rev_tl_bal_gt_0            0.668573\n",
       "num_tl_30dpd                    acc_now_delinq                 0.801444\n",
       "num_tl_90g_dpd_24m              delinq_2yrs                    0.669267\n",
       "num_tl_op_past_12m              open_acc_6m                    0.722131\n",
       "                                open_il_12m                    0.557902\n",
       "                                open_rv_12m                    0.844841\n",
       "                                open_rv_24m                    0.660265\n",
       "                                acc_open_past_24mths           0.774867\n",
       "pct_tl_nvr_dlq                  num_accts_ever_120_pd         -0.592502\n",
       "percent_bc_gt_75                bc_util                        0.844108\n",
       "pub_rec_bankruptcies            pub_rec                        0.580798\n",
       "tax_liens                       pub_rec                        0.752084\n",
       "tot_hi_cred_lim                 tot_cur_bal                    0.982693\n",
       "                                avg_cur_bal                    0.795652\n",
       "                                mort_acc                       0.560840\n",
       "total_bal_ex_mort               total_bal_il                   0.902486\n",
       "total_bc_limit                  max_bal_bc                     0.581536\n",
       "                                total_rev_hi_lim               0.775151\n",
       "                                bc_open_to_buy                 0.834159\n",
       "                                num_bc_sats                    0.633461\n",
       "total_il_high_credit_limit      open_il_6m                     0.552023\n",
       "                                total_bal_il                   0.960349\n",
       "                                num_il_tl                      0.583329\n",
       "                                total_bal_ex_mort              0.889238\n",
       "dtype: float64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cor = df.corr()\n",
    "cor.loc[:,:] = np.tril(cor, k=-1) # below main lower triangle of an array\n",
    "cor = cor.stack()\n",
    "cor[(cor > 0.55) | (cor < -0.55)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df.drop(['funded_amnt','funded_amnt_inv', 'installment'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Our Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn import ensemble\n",
    "from sklearn.preprocessing import OneHotEncoder #https://ljalphabeta.gitbooks.io/python-/content/categorical_data.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "Y = df.loan_status\n",
    "X = df.drop('loan_status',1,inplace=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(95210,)\n",
      "92511.0\n"
     ]
    }
   ],
   "source": [
    "print Y.shape\n",
    "print sum(Y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X = pd.get_dummies(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index([u'loan_amnt', u'int_rate', u'emp_length', u'annual_inc', u'dti',\n",
      "       u'delinq_2yrs', u'inq_last_6mths', u'mths_since_last_delinq',\n",
      "       u'mths_since_last_record', u'open_acc', u'pub_rec', u'revol_bal',\n",
      "       u'total_acc', u'collections_12_mths_ex_med',\n",
      "       u'mths_since_last_major_derog', u'acc_now_delinq', u'tot_coll_amt',\n",
      "       u'tot_cur_bal', u'open_acc_6m', u'open_il_6m', u'open_il_12m',\n",
      "       u'open_il_24m', u'mths_since_rcnt_il', u'total_bal_il', u'il_util',\n",
      "       u'open_rv_12m', u'open_rv_24m', u'max_bal_bc', u'all_util',\n",
      "       u'total_rev_hi_lim', u'inq_fi', u'total_cu_tl', u'inq_last_12m',\n",
      "       u'acc_open_past_24mths', u'avg_cur_bal', u'bc_open_to_buy', u'bc_util',\n",
      "       u'chargeoff_within_12_mths', u'delinq_amnt', u'mo_sin_old_il_acct',\n",
      "       u'mo_sin_old_rev_tl_op', u'mo_sin_rcnt_rev_tl_op', u'mo_sin_rcnt_tl',\n",
      "       u'mort_acc', u'mths_since_recent_bc', u'mths_since_recent_bc_dlq',\n",
      "       u'mths_since_recent_inq', u'mths_since_recent_revol_delinq',\n",
      "       u'num_accts_ever_120_pd', u'num_actv_bc_tl', u'num_actv_rev_tl',\n",
      "       u'num_bc_sats', u'num_bc_tl', u'num_il_tl', u'num_op_rev_tl',\n",
      "       u'num_rev_accts', u'num_rev_tl_bal_gt_0', u'num_sats',\n",
      "       u'num_tl_120dpd_2m', u'num_tl_30dpd', u'num_tl_90g_dpd_24m',\n",
      "       u'num_tl_op_past_12m', u'pct_tl_nvr_dlq', u'percent_bc_gt_75',\n",
      "       u'pub_rec_bankruptcies', u'tax_liens', u'tot_hi_cred_lim',\n",
      "       u'total_bal_ex_mort', u'total_bc_limit', u'total_il_high_credit_limit',\n",
      "       u'home_ownership_ANY', u'home_ownership_MORTGAGE',\n",
      "       u'home_ownership_OWN', u'home_ownership_RENT',\n",
      "       u'verification_status_Not Verified',\n",
      "       u'verification_status_Source Verified', u'verification_status_Verified',\n",
      "       u'pymnt_plan_n', u'pymnt_plan_y', u'initial_list_status_f',\n",
      "       u'initial_list_status_w', u'application_type_DIRECT_PAY',\n",
      "       u'application_type_INDIVIDUAL', u'application_type_JOINT'],\n",
      "      dtype='object')\n",
      "[[  1.50000000e+04   1.39900000e+01   2.00000000e+00   5.50000000e+04\n",
      "    2.37800000e+01   1.00000000e+00   0.00000000e+00   7.00000000e+00\n",
      "               nan   2.20000000e+01   0.00000000e+00   2.13450000e+04\n",
      "    4.30000000e+01   0.00000000e+00              nan   0.00000000e+00\n",
      "    0.00000000e+00   1.40492000e+05   3.00000000e+00   1.00000000e+01\n",
      "    2.00000000e+00   3.00000000e+00   1.10000000e+01   1.19147000e+05\n",
      "    1.01000000e+02   3.00000000e+00   4.00000000e+00   1.46120000e+04\n",
      "    8.30000000e+01   3.90000000e+04   1.00000000e+00   6.00000000e+00\n",
      "    0.00000000e+00   7.00000000e+00   6.38600000e+03   9.64500000e+03\n",
      "    7.31000000e+01   0.00000000e+00   0.00000000e+00   1.57000000e+02\n",
      "    2.48000000e+02   4.00000000e+00   4.00000000e+00   0.00000000e+00\n",
      "    4.00000000e+00   7.00000000e+00   2.20000000e+01   7.00000000e+00\n",
      "    0.00000000e+00   5.00000000e+00   9.00000000e+00   6.00000000e+00\n",
      "    7.00000000e+00   2.50000000e+01   1.10000000e+01   1.80000000e+01\n",
      "    9.00000000e+00   2.20000000e+01   0.00000000e+00   0.00000000e+00\n",
      "    0.00000000e+00   5.00000000e+00   1.00000000e+02   3.33000000e+01\n",
      "    0.00000000e+00   0.00000000e+00   1.47587000e+05   1.40492000e+05\n",
      "    3.02000000e+04   1.08587000e+05   0.00000000e+00   0.00000000e+00\n",
      "    0.00000000e+00   1.00000000e+00   1.00000000e+00   0.00000000e+00\n",
      "    0.00000000e+00   1.00000000e+00   0.00000000e+00   1.00000000e+00\n",
      "    0.00000000e+00   0.00000000e+00   1.00000000e+00   0.00000000e+00]]\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 95210 entries, 0 to 99119\n",
      "Data columns (total 84 columns):\n",
      "loan_amnt                              95210 non-null float64\n",
      "int_rate                               95210 non-null float64\n",
      "emp_length                             95210 non-null int64\n",
      "annual_inc                             95210 non-null float64\n",
      "dti                                    95210 non-null float64\n",
      "delinq_2yrs                            95210 non-null float64\n",
      "inq_last_6mths                         95210 non-null float64\n",
      "mths_since_last_delinq                 51229 non-null float64\n",
      "mths_since_last_record                 18903 non-null float64\n",
      "open_acc                               95210 non-null float64\n",
      "pub_rec                                95210 non-null float64\n",
      "revol_bal                              95210 non-null float64\n",
      "total_acc                              95210 non-null float64\n",
      "collections_12_mths_ex_med             95210 non-null float64\n",
      "mths_since_last_major_derog            28125 non-null float64\n",
      "acc_now_delinq                         95210 non-null float64\n",
      "tot_coll_amt                           95210 non-null float64\n",
      "tot_cur_bal                            95210 non-null float64\n",
      "open_acc_6m                            95210 non-null float64\n",
      "open_il_6m                             95210 non-null float64\n",
      "open_il_12m                            95210 non-null float64\n",
      "open_il_24m                            95210 non-null float64\n",
      "mths_since_rcnt_il                     92660 non-null float64\n",
      "total_bal_il                           95210 non-null float64\n",
      "il_util                                82017 non-null float64\n",
      "open_rv_12m                            95210 non-null float64\n",
      "open_rv_24m                            95210 non-null float64\n",
      "max_bal_bc                             95210 non-null float64\n",
      "all_util                               95204 non-null float64\n",
      "total_rev_hi_lim                       95210 non-null float64\n",
      "inq_fi                                 95210 non-null float64\n",
      "total_cu_tl                            95210 non-null float64\n",
      "inq_last_12m                           95210 non-null float64\n",
      "acc_open_past_24mths                   95210 non-null float64\n",
      "avg_cur_bal                            95210 non-null float64\n",
      "bc_open_to_buy                         94160 non-null float64\n",
      "bc_util                                94126 non-null float64\n",
      "chargeoff_within_12_mths               95210 non-null float64\n",
      "delinq_amnt                            95210 non-null float64\n",
      "mo_sin_old_il_acct                     92660 non-null float64\n",
      "mo_sin_old_rev_tl_op                   95210 non-null float64\n",
      "mo_sin_rcnt_rev_tl_op                  95210 non-null float64\n",
      "mo_sin_rcnt_tl                         95210 non-null float64\n",
      "mort_acc                               95210 non-null float64\n",
      "mths_since_recent_bc                   94212 non-null float64\n",
      "mths_since_recent_bc_dlq               24968 non-null float64\n",
      "mths_since_recent_inq                  85581 non-null float64\n",
      "mths_since_recent_revol_delinq         35158 non-null float64\n",
      "num_accts_ever_120_pd                  95210 non-null float64\n",
      "num_actv_bc_tl                         95210 non-null float64\n",
      "num_actv_rev_tl                        95210 non-null float64\n",
      "num_bc_sats                            95210 non-null float64\n",
      "num_bc_tl                              95210 non-null float64\n",
      "num_il_tl                              95210 non-null float64\n",
      "num_op_rev_tl                          95210 non-null float64\n",
      "num_rev_accts                          95210 non-null float64\n",
      "num_rev_tl_bal_gt_0                    95210 non-null float64\n",
      "num_sats                               95210 non-null float64\n",
      "num_tl_120dpd_2m                       91951 non-null float64\n",
      "num_tl_30dpd                           95210 non-null float64\n",
      "num_tl_90g_dpd_24m                     95210 non-null float64\n",
      "num_tl_op_past_12m                     95210 non-null float64\n",
      "pct_tl_nvr_dlq                         95210 non-null float64\n",
      "percent_bc_gt_75                       94156 non-null float64\n",
      "pub_rec_bankruptcies                   95210 non-null float64\n",
      "tax_liens                              95210 non-null float64\n",
      "tot_hi_cred_lim                        95210 non-null float64\n",
      "total_bal_ex_mort                      95210 non-null float64\n",
      "total_bc_limit                         95210 non-null float64\n",
      "total_il_high_credit_limit             95210 non-null float64\n",
      "home_ownership_ANY                     95210 non-null float64\n",
      "home_ownership_MORTGAGE                95210 non-null float64\n",
      "home_ownership_OWN                     95210 non-null float64\n",
      "home_ownership_RENT                    95210 non-null float64\n",
      "verification_status_Not Verified       95210 non-null float64\n",
      "verification_status_Source Verified    95210 non-null float64\n",
      "verification_status_Verified           95210 non-null float64\n",
      "pymnt_plan_n                           95210 non-null float64\n",
      "pymnt_plan_y                           95210 non-null float64\n",
      "initial_list_status_f                  95210 non-null float64\n",
      "initial_list_status_w                  95210 non-null float64\n",
      "application_type_DIRECT_PAY            95210 non-null float64\n",
      "application_type_INDIVIDUAL            95210 non-null float64\n",
      "application_type_JOINT                 95210 non-null float64\n",
      "dtypes: float64(83), int64(1)\n",
      "memory usage: 61.7 MB\n"
     ]
    }
   ],
   "source": [
    "print X.columns\n",
    "print X.head(1).values\n",
    "X.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X.fillna(0.0,inplace=True)\n",
    "X.fillna(0,inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Train Data & Test Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=.3, random_state=123)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(66647, 84)\n",
      "(66647,)\n",
      "(28563, 84)\n",
      "(28563,)\n"
     ]
    }
   ],
   "source": [
    "print(x_train.shape)\n",
    "print(y_train.shape)\n",
    "print(x_test.shape)\n",
    "print(y_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0    64712\n",
      "0.0     1935\n",
      "Name: loan_status, dtype: int64\n",
      "1.0    27799\n",
      "0.0      764\n",
      "Name: loan_status, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print y_train.value_counts()\n",
    "print y_test.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Gradient Boosting Regression Tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# param_grid = {'learning_rate': [0.1, 0.05, 0.02, 0.01],\n",
    "#               'max_depth': [1,2,3,4],\n",
    "#               'min_samples_split': [50,100,200,400],\n",
    "#               'n_estimators': [100,200,400,800]\n",
    "#               }\n",
    "\n",
    "param_grid = {'learning_rate': [0.1],\n",
    "              'max_depth': [2],\n",
    "              'min_samples_split': [50,100],\n",
    "              'n_estimators': [100,200]\n",
    "              }\n",
    "# param_grid = {'learning_rate': [0.1],\n",
    "#               'max_depth': [4],\n",
    "#               'min_samples_leaf': [3],\n",
    "#               'max_features': [1.0],\n",
    "#               }\n",
    "\n",
    "est = GridSearchCV(ensemble.GradientBoostingRegressor(),\n",
    "                   param_grid, n_jobs=4, refit=True)\n",
    "\n",
    "est.fit(x_train, y_train)\n",
    "\n",
    "best_params = est.best_params_\n",
    "print best_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'min_samples_split': 100, 'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 3}\n"
     ]
    }
   ],
   "source": [
    "print best_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 24.2 s, sys: 251 ms, total: 24.4 s\n",
      "Wall time: 25.6 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "est = ensemble.GradientBoostingRegressor(min_samples_split=50,n_estimators=300,\n",
    "                                         learning_rate=0.1,max_depth=1, random_state=0,loss='ls').\\\n",
    "fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.028311715416075908"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "est.score(x_test,y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 20 s, sys: 272 ms, total: 20.3 s\n",
      "Wall time: 21.6 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "est = ensemble.GradientBoostingRegressor(min_samples_split=50,n_estimators=100,\n",
    "                                         learning_rate=0.1,max_depth=2, random_state=0,loss='ls').\\\n",
    "fit(x_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.029210266192750467"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "est.score(x_test,y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def compute_ks(data):\n",
    "\n",
    "    sorted_list = data.sort_values(['predict'], ascending=[True])\n",
    "\n",
    "    total_bad = sorted_list['label'].sum(axis=None, skipna=None, level=None, numeric_only=None) / 3\n",
    "    total_good = sorted_list.shape[0] - total_bad\n",
    "\n",
    "    # print \"total_bad = \", total_bad\n",
    "    # print \"total_good = \", total_good\n",
    "\n",
    "    max_ks = 0.0\n",
    "    good_count = 0.0\n",
    "    bad_count = 0.0\n",
    "    for index, row in sorted_list.iterrows():\n",
    "        if row['label'] == 3:\n",
    "            bad_count += 1.0\n",
    "        else:\n",
    "            good_count += 1.0\n",
    "\n",
    "        val = bad_count/total_bad - good_count/total_good\n",
    "        max_ks = max(max_ks, val)\n",
    "\n",
    "    return max_ks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0\n"
     ]
    }
   ],
   "source": [
    "test_pd = pd.DataFrame()\n",
    "test_pd['predict'] = est.predict(x_test)\n",
    "test_pd['label'] = y_test\n",
    "# df['predict'] = est.predict(x_test)\n",
    "print compute_ks(test_pd[['label','predict']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdcAAAEZCAYAAADFZm8FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XeYXVXZ/vHvISYMSWZGMUNXpHkjCdIEBJEmKKAIiKig\nLxGlioVmgR9NfMVeKCIoRSIKIoqASpUSAiZBmtQHhFeN1AyaYVImJJPz+2Ot8RyG6bOn35/ryjVn\n9tl7rbWfzJz7rL33nF0ql8uYmZlZcVYa6gGYmZmNNg5XMzOzgjlczczMCuZwNTMzK5jD1czMrGAO\nVzMzs4K9bqgHYDbaSDoL2DF/uwnwNNAClIHtImJpP9v/LfAWoARsBjwErADmR8R7+9N2bn9lYAkw\nOSIW97e9XvZ9FNASEZf0cfuVgJuBvavHLqkeeBbYPiIebLfNTcBvI+L8XvRzE3BERPxfF+tcDtwZ\nEee1Wz5k9bXB43A1K1hEfKHtsaSngYMi4v4C2/9Qbnsc8AqwQ0Q0F9V+NlR/AL8jcGc/tp8A7NJ+\nYUQ0SfoF8Cmg+v9nfeAdwL696aSANzH+gIFRzuFqNrBK+d9/SdoJ+BawMikcT46ImyV9GtgfGA+s\nBcwDDo6IF3vZ/o7AmUANsBw4NSJuknQEsBspgNYHFgOfiIgnO2tckoDfAXOArXNfXwKOAjYG7oqI\ng/N61wFzgWmkmfRRETFH0gTgLODdQCtwN3BcRCyR9BxwG2kG/i3gfcC7JLUANwAXAG/I9XgKOCAi\nFuTtzgPeC6wD/DwiTgUuzkO/W9JuEdFYtTs/Bm6SdEJELMvLDs3bLpa0Vjf9tY3zhNzPe4DI+7YV\nUJf3+5CI+Etuf1dJ04FJwB+BL3dQ4yPzOErAi8DnIuJvknYFvlm16hkR8ftO/qtsmPE5V7NBJGkK\n8CtS8GwBfBr4paR18irbA4dHxFTS4d6zetl+AykgPhIR7wAOAC6WtHpeZSfg0IjYFHgQOK4nzZIC\naCowG/gO6U3ApsCekjbP620IXBURmwNn5P0E+BpQm9ffPD/+elX7f4mIqRExA7gR+GZEXAx8HLgl\nInYANiCFz4FV242PiHfnffqypDWAQ6gcfq8OVvLRgyeA/XKtXgd8Ejg3r9Jdf/fmcV5PZea5A1Af\nEdvl+lwFfLFqmzVIbyq2At4FHPyqwkrvzbXcPiK2Ir1haKvbGcD/RsQ2pDczr5mR2/DlcDUbXNsB\nj7UdJo6Ih0mBtVN+/oaq83g/Jc3MemMHYE3g95LuB64lzV6n5efnRMT8/Pg+YNUetNkcEX/Kj58C\nZkVESz5f+HxVG89GxO/yfv0OWEXSJsAewI8johwRZVKY7VnV/qyOOo2I7wAPSDqOFDoCJletck1e\n75/Af0gzzjavms1X+THpDQ3APsDDbTP3HvRXfbi6lLe5AzhT0lGSvpfbrN7m0oh4JZ9n/wWwe7vx\nvJ90BGB2/v/6GrC6pFWAK4CLJM0gnbs/tZN9smHI4Wo2uDr6nRtHOhQMKQirl7d20177c3fjgPsj\nYsuI2CLPjrcH7sjPL2m3bWchVK39BVjLOlzr1WNvG8tyXrvP1fsLsLCjxvKFYScCz5FC8bZ24+1s\nX7rapyuBt0t6M+lQbNustSf9vWackj5EOmzeCvwGuLDdNtX/fyVeW7txwIVt/1/AFsA2EbEkIs4l\nHYa+Fdgb+GsOXRsBHK5mg+vPwFRJWwJI2pQUfrfn53evOoR7OOk8ZlfaB8ldwGaSts3tb006FDql\nl+MsdfK4K2/O55ORdADQGBFPkA71HiVppXwR1lHATZ20sZxK8L4X+F5EXA4sIJ3jHNfNGJaTgnZ8\nR09GxCvAJcDxwEa8ur596W930qHwnwAPkGau1dscJOl1kiaSDgn/MS9vq+mNwCfy4XyAY9vWkfQX\nYOOI+BnpZ2E10nldGwEcrmYD61Uzy3xx0keB8yU9CFxKuqjo73mVf5HOwT5Kuqimu3Oi7dt/DvgI\ncI6kB0iHlg+IiOf7Me6urmytfu5l4Mi8X18APpSXnwYsAv4KPEKaCX+xg+0hBcuXJB2TtztP0j3A\n5aTZ94adbFcGiIhW4PfAPZI2pGMXAEcCP8mHqdv0uj/gR8D78yHdmcDjpPO1beaR3lDdA/whIn7d\nbrzXkWbPt+a67Q18OK9zPPBdSfeR/rzoSxHxQif7ZMNMybecMxse8tXC72/7U5uRJF8tPCsiGrpd\n2WwM8MzVzIrid+pmmWeuZmZmBfPM1czMrGAOVzMzs4L54w/HgHK5XC6VevrXFGZmlvX5hdPhOgaU\nSiXmzy/6c91HpoaGWtcicy0qXIsK16KioaG2z9v6sLCZmVnBPHMdA5qamnj5Zb8TBZgwYYVrkbkW\nFa5FhWtR0Z+Zq8N1DJgxt5nWjj8NbsyZ/AIsXORagGtRzbWocC2SlsXNfGWD7tfrjMN1DFhlUi2t\npZWHehjDwsTJNa5F5lpUuBYVrkUxfM7VzMysYA5XMzOzgjlczczMCuZwNTMzK5jD1czMrGAOVzMz\ns4I5XM3MzArmcDUzMyuYw9XMzKxgDlczM7OCOVzNzMwK5nA1MzMrmMO1AJLeJ+nQTp57g6QDC+hj\nX0lr9LcdMzMbeL4rTgEi4sYunt4M+CBweT+7+QLwKPB8P9sxM7MB5nAtgKTpwB7AusA8YANgTkQc\nDZwEvF3SoRFxYSfb/4MUnI8CFwPfJx1VmAIcBawKbA7MkLRDXnYgsAK4IiLOHcDdMzOzXnK4Fmsj\nYDegBXha0leBrwNHdBas2drAZhGxQNJHgOMi4pF8OPmQiDhC0gPA4bmPjwDvAkrAzZJujIgnO2t8\nyaJmWmkpZAdHunHlpSxZ5FqAa1HNtahwLZKWxc3A6n3e3uFarL9FxGIASc8CNT3crjEiFuTHzwCn\nSloM1AFNVeuVgGmkGfKf8vevJwVup+F68Da1vdmHMaCn/y1jgWtR4VpUuBb9rYHDtVjlqsel/HUF\nMK4X250NHBQRIel0UpBWtxPAwxGxF4CkY4C/dtV4fX098+c392gHRruGhlrXInMtKlyLCteiGL5a\nuDjlTr5/Cpgm6fM93PbnwFWS7iDNSNfKy+8GLgX+CdwqaZake4ANSbNdMzMbJkrlcvtMsNGmqamp\n3Njod6IAU6bU4lokI6kWtbV1lEql7lfsI8/WKlyLioaG2j7/0Pmw8CCRtDdwHJVZaik/PisirhnI\nvmfMbaaV8QPZxYgx+QVYuMi1gJFTi5bFzRwwDerq6od6KGY95nAdJBFxHXDdUPS9yqRaWksrD0XX\nw87EyTWuRTayarFsqAdg1is+52pmZlYwh6uZmVnBHK5mZmYFc7iamZkVzOFqZmZWMIermZlZwRyu\nZmZmBXO4mpmZFczhamZmVjCHq5mZWcEcrmZmZgVzuJqZmRXM4dpLkt4taVoftvs/SRMGYkxmZja8\nOFx771PA2n3YzjfONTMbI0bdLeckTQf2BWqBNwKXA/tHxLb5+SuA7wEXATOBtwOPAy8AOwItwF7A\nycB6wGrAm4FjgZeAPYAtJD0SEf/qQf9nRMTVpPu3Imkq8H3SG5spwFERMVvSE8AsYGPg+TzmDgNZ\n0m3AA8C03M8BETGvjyUzM7OCjbpwzSZGxG6SVgPmAvMkbUwK0LdExD2SaoHLcrA9BhwTEafk4Jqa\n22mJiL0k7QYclx/fAFzeUbB20v8cSddWPTc1t/WIpAOBQ4DZwPrAzhHxrKRZwNZ57J2ZExHHSvpf\n4EDg252tuGRRM620dNHU2DGuvJQli1wLGDm1aFncDNQM9TDMemW0husdABHxoqT/AOeTQuyfwGVV\n692fvy4AHqt6XNPu+Xn07re7ff8NVA4LPwOcKmkxUAc05eXzI+LZXvRXPbbVu1rx4G1qezH0scAv\n1BUjoRY11NXVUSqVBrSXhgb/nrRxLfpvtIbrVgCSVicdNv0tcALQCBxQtV5350E7en4FMK4X/dcB\nL5IPCwNnAwdFREg6HVi3m7Z6M7YO1dfXM39+cx+7GV0aGmpdi2wk1aKxceGAtj+SajHQXIuK/rzJ\nGK0XNK0p6RbgOtI5zSWk86svRsSCvE51OHX0uLPwmgN8Q5J60f+KqvYuA66SdAewEbBWN+PpiC+O\nMjMbxkrl8uh6nc4XFCkiTmq3/Fzgqoi4fSj6H2JlvxNN/K68wrWocC0qXIuKhobaPp+LGK2HhV9F\n0o2kc5q3F9jmj4BNqMwiS/nxrwpq/03AjA7avyMivtqbtpqamnj5Zf+yAEyYsMK1yCZMWEG5XBrw\nc5lmY9Gom7naa51z87/KrYwf6mEMC5Mn1bBwBFwhOxjGsYy91oe6uvqhHsqQ82ytwrWo8MzVurTK\npFpaSysP9TCGhYmTa1yLbFx5KeAXUbOBMFovaDIzMxsyDlczM7OCOVzNzMwK5nA1MzMrmMPVzMys\nYA5XMzOzgjlczczMCuZwNTMzK5jD1czMrGAOVzMzs4I5XM3MzArmcDUzMyuYw7WXJL1b0rQ+bPd/\nkiYMxJjMzGx4cbj23qeAtfuwne/tZ2Y2Roy6W85Jmg7sC9QCbwQuB/aPiG3z81cA3wMuAmYCbwce\nB14AdgRagL2Ak4H1gNWANwPHAi8BewBbSHokIv7Vg/7PiIirSTc7R9JU4PukNzZTgKMiYrakJ4BZ\nwMbA83nMrwlkSXXAfcBGEVGW9E3gLxFxVX/qZmZmxRl14ZpNjIjdJK0GzAXmSdqYFKBviYh7JNUC\nl+Vgeww4JiJOkXQbMDW30xIRe0naDTguP74BuLyjYO2k/zmSrq16bmpu6xFJBwKHALOB9YGdI+JZ\nSbOArfPYXyUiXpZ0J/A+STcBe5LeCHRqyaJmWvENwiHdw3SJb5YOpJulm9nAGK3hegdARLwo6T/A\n+aQQ+ydwWdV69+evC4DHqh7XtHt+XtWyvvTfQOWw8DPAqZIWA3VAU14+PyKe7WF/FwKfB8YBN0fE\n8q4Gc/A2tb0Y+ljQm//K0ayGuro6SqXSUA9kWGho8O9JG9ei/0ZruG4FIGl10uHZ3wInAI3AAVXr\ndXcetKPnV5BCraf91wEvkg8LA2cDB0VESDodWLebtl4jIu6SdBbp/G+Xs1aA+vp65s9v7m03o1JD\nQ61rkbkWFa5FhWtR0Z83GaP1gqY1Jd0CXEc6p7mEdH71xYhYkNepDs6OHncWvHOAb0hSL/pfUdXe\nZcBVku4ANgLW6mY8nfkFsEZEPNbtmmZmNqhK5fLouog1X1CkiDip3fJzgasi4vah6H8A+jkBaIyI\nn3W3blNTU7mx0e9EAaZMqWWk1qK2tthDuJ6hVLgWFa5FRUNDbZ9/4UbrYeFXkXQj6Zzm7QW2+SNg\nEyqzzFJ+/KuC2n8TMKOD9u8A3gKsCezdk7ZmzG2mlfFFDGvEm/wCLFw08mrRsriZA6ZBXV39UA/F\nzHpg1M1c7bUuvLup3FpaeaiHMSzUTq6heeHIu1p4ycIm9l5/WaHh6hlKhWtR4VpU9GfmOlrPuZqZ\nmQ0Zh6uZmVnBHK5mZmYFc7iamZkVzOFqZmZWMIermZlZwRyuZmZmBXO4mpmZFczhamZmVjCHq5mZ\nWcEcrmZmZgVzuJqZmRVsTNwVp78k7QvMjojnO3m+jnSf1jpgPHB8RMyuev4kYNOIOLCH/b0b+E9E\nPCzpuYhYs987YWZmg8Yz1575Aik4O3MccEtE7AwcAvyo7QlJewJ70bMboLf5FB3fRN3MzEaAbmeu\nkmqBC4F60gv+ecB9wA9J9xh9Bvg4sDnwg+plEbG0kzaPBz4KLANmRsSJkk4DNgZWA14PfC4i7pZ0\nAHAssByYFREn5XXXy+u+GTg2Im7upK+dgP8HrABWB34aEedJ2hE4LY93MnAQMA+4khSkE/N2E/K+\nzZC0Q0Qs76Cb7wNt+zoeWJL73hA4DDgVOLRqPCfm9dcBLgB2Bd4OnAXcA+wBbCHpMaBG0mXAukAj\n8GHgncB3gVeAxcCHI2JRR/tvZmaDryeHhTcELo+I30lak3Sz7oXAxyLiCUmHkG4afj7w0aplbwMe\naN+YpGnkgIiIFZKukvT+/PSiiHiPpE2AX0raBTgd2CoiWiTNkLRbXrclIvbK3x8PdBiu2VqkgHwd\n8JCkK4GppDcAz0s6ETgAuAZ4IyncVgc2iog/SrofOKKTYCUiXs77tgbwc+DzkiYB5wL/k/uqtjaw\nGbA1KczXB94E/DYiLpB0A/DLiJgnaTJwYn58K7AFsA/ppuxnAR8E3gB0Gq5LFjXTysi7h+lAGFde\nypJFI68WLYubgZqhHoaZ9VBPwvUF4BhJHwKaSTOzNSLiCYCIuARA0urtl3ViY9L5yxX5+1mk8CkD\nt+btH5W0OrAB0AD8UVLbDHP9vN39+es8oLs7gd+dg3G5pIdzu88A50hqJs0gZ+V+fwJcQarN2Xn7\nUv7XKUmbAr8knW+dJWk/UkD/ihR+a0r6EjAHeDi/sVgAPBURrZL+w6tfPdv6eyki5uXHL5Bm1GeS\nZtV/Av4FzKYLB29T29XTY9BIDKka6urqKJX6fO/mDjU0+GejjWtR4Vr0X0/C9XhSOF0gaWfg/cCz\nkjaMiL/lwHgiL9sgIp7KyyIirumgvceB4yStRArUHYFLSTPLrUgz1mmk8Ps/4J/A7jmAppNCdT96\ndy5yixzOq5Bm2U8C1wLrR8QiST8DSrnf2oj4QJ6F3gX8kXRIudPz03mmfSXwkYh4iLTzVwNX5+d3\nIs18v50fV4+9o1fLLvsDPgFcEhFflPQV4HDga52tXF9fz/z5zV00N3Y0NNSO2Fo0Ni4stL2RXIui\nuRYVrkVFf95k9OSCpuuAz0q6DTiGdJ70KODivGxz4A/AkcAlVcv+2FFjEfEwKYjuJs24nq4K4S0k\n3QL8BDg0Il4incedKWk26XDtE33Yz/HA9aRD2l+LiH+TDt/OknQnaUa8Vm57Z0l35DGekre/m3TO\n9fWdtH8mafZ8lqTbJF3di7F19CZhDvBNSRu3e77t8VzgolyrXYAZvejPzMwGWKlcHh4Xo+aLlJ6L\niJ8U3G7brPGgItsdSZqamsqNjX4nCjBlSi0jqRa1tcUfCm7jGUqFa1HhWlQ0NNT2+ZdvwP7OVdJh\npCtw29K7lB+fGBFzOtikXykv6RTSVbft+7u0P+226+M3pPOnbUrAgojYr6g+BsKMuc20Mn6ohzEs\nTH4BFi4aGbVoWdzMAdOgrq5+qIdiZr00bGauNnAuvLup3Frq7pqvsaF2cg3NC0fG1cJLFjax9/rL\nBixcPUOpcC0qXIuK/sxc/SESZmZmBXO4mpmZFczhamZmVjCHq5mZWcEcrmZmZgVzuJqZmRXM4Wpm\nZlYwh6uZmVnBHK5mZmYFc7iamZkVzOFqZmZWMIermZlZwQYsXCXtK2kNSetK+vMAtP99SesU3e5g\nkjRN0ru7eH4nSZcP5pjMzKz/BuyWc8AXgEeBpfTzdnIdiYjjim5zCOwPPA/c2cU6vm2RmdkI0+dw\nlTQd2BtYBVgDOBvYB5gKfBHYHJgB/A+wmqTfAmsBf42IwyV9CPgS8ArwbER8rIu+vg7sDIwDfhMR\n35F0G3AEcCCwHrAa8Gbg2Ii4WdIHgFNzE/dFxJH5xun/CywHniLdRL21kz5vA14k3b/1A8B5wIak\n2f4pEXFHD/s4Evg4sBcwEVgf+BZwC/BJYKmkeyPiL53s/lslXQ+8ETg/Ii6WtC3wA9L9ZJ8BPh4R\nSzurn5mZDa7+zlwnR8Qekj4KHBMR2+VwOQa4nxQsrwC1pCBpBp6UNAX4GPDtiPitpE9IqouIlzvp\n50BSuD4PTM/Lqmd0LRGxl6TdgOMk3QqcA7wjIl6SdIKkNwM/Ad4VEY2SzshjuqiL/ftFRFwr6Uhg\nfkQcKmlVYKakzXrRxzKgLiL2lLQhcF1EzJD0M+C5LoIV0v/RB4DxwAOSrgXOBz4aEU9IOgR4G/BA\nZw0sWdRMKyPjHqYDbVx5KUsWjYxatCxuBmqGehhm1gf9Ddf789cFwGNVj9u/IjzdFpySXiTN4I4D\nTpT0ubzt77ro5xOk2d7qwPV5WfVNbNvGMS/3PQX4T0S8BBAR35XUAKwJXCkJ0oz75m7274n8dVNg\nhzxjLJFm0Gt000cpj+Vm0gy2LfzmAb25c/nsPLtulfQo8BZg9Yh4Ivd7SXcNHLxNbS+6GwtGSmDV\nUFdXR6nU5/s1d6uhwT8bbVyLCtei//obrl2dD1xBCqH22l4pDgdOyzO884H9gJ+3X1nSBOCAiDgw\nf/+opCu6GceLQL2k10fEAklnAZeRgm2fiGiWtDdpJt2VFfnr48C8iPimpBrgJODZXvSxbrsxlqra\n76hG1baUtBLpzcDbgL8Bz0raICKekvQlICLims4aqK+vZ/787nZ1bGhoqB1RtWhsXDhgbY+0Wgwk\n16LCtajoz5uMgbpauAzcDVwKrMqrg6Xt8VzgD5JuIc1If99RQxHxCvBvSbPzedAbImJeVTuvCfiI\nKANHA3+UNDMvu4d0uPqPku4CjgIe7mYf2lwAvE3S7cBdwD9yH5/pYx9tbd8LHJ0PpXdmCWm2fivp\nzcgC0uH2S3I9Ngf+2MX2ZmY2yErlsi9GHQPKfiea+F15hWtR4VpUuBYVDQ21fT4nM5B/itMrkrYG\nvk1lVlfKj38VERcMUJ9vIl3R3L7POyLiqwPRZyfjOAXYtYNxHBIR/+hv+01NTbz8sn9ZACZMWDEs\nalFbO7DnUs1saHnmOgacc/O/yq2MH+phDAuTJ9WwcIivFm5Z3MwB02qoq6sf0nF4hlLhWlS4FhWj\nYuZqA2eVSbW0lnpzgfLoNXFyzTCpxbKhHoCZDSB/trCZmVnBHK5mZmYFc7iamZkVzOFqZmZWMIer\nmZlZwRyuZmZmBXO4mpmZFczhamZmVjCHq5mZWcEcrmZmZgVzuJqZmRXM4WpmZlYwh2s7kqZL+sZQ\nj6OnJO0raY2hHoeZmVU4XDs2ku7D9wWgbqgHYWZmFb7lXCckHQd8jHRvsJkRcaKktYEfAysDawIn\nR8S1kh4E7gDeDqwA9omIDm+IKGkq8H3SG5spwFERMVvSk8BdwFuBW4F6YBvg8YiYLukSYCnwFmAN\n4JPAWsDmwAxJO0TE8o76XLKomVaG9h6mw8W48lKWDIP7uULNkI7BzAaWw7VjbwV2Ad4ZESskXSVp\nL6AF+G5EzJS0HXA6cC1p5viLiPi8pMuAPYErO2l7KnBcRDwi6UDgEGA2KTR3Bl4A/g1sHREh6SlJ\nbTPTv0fEkZIOBQ6PiM9IeiA/7jBYAQ7eprYfpRiNhjrYaqirq6NU6vN9mAvT0OCfjTauRYVr0X8O\n145tDlwXESvy97NIofgH4GRJn87Lx1dt80D+Oo+uX72fAU6VtJgUyk15eWNEPAMgaWFERF7eVNXe\n/VV9bF/VZpev0vX19cyf3+FEesxpaKgdFrVobFw41EMYNrUYDlyLCteioj9vMnzOtWMPANtKGiep\nBOwIPAF8Dbg0IqYDt/HqUOvpedqzgVMj4hDgIToOxlInjzvqYwX+fzQzG1b8otyxJ0iHde8iHbJ9\nOiKuAX4NfE/S7cDuwBvz+tWh113I/hy4StIdwEak86ZdtVHuYFm1u0nnXF/fTb9mZjZISuXySLow\n1vqiqamp3NjowzwAU6bUMti1qK0dHudX2/PhvwrXosK1qGhoqO3zL67PuQ4ASeOBm3jtbDMi4qjB\nHs+Muc20vur08Ng1+QVYuGjwatGyuJkDpkFdXf2g9WlmQ8/hOgAiYhnpauNhYZVJtbSWVh7qYQwL\nEyfXDEEtlg1yf2Y21HzO1czMrGAOVzMzs4I5XM3MzArmcDUzMyuYw9XMzKxgDlczM7OCOVzNzMwK\n5nA1MzMrmMPVzMysYA5XMzOzgjlczczMCuZwNTMzK9iwD1dJX5b0joLaOk3S4R0sf66I9ns4hjdI\nOjA/vkTSewerbzMzGxzD/q44EfGtQehmMG9quxnwQeDyQezTzMwGUb/DVdJ0YG9gFWAN4GxgH2Aq\n8EVgMnAM0AI8CRweEa2dtPUZ4GCgFbgnIo6RdAkpiNYE9gImAusD34qIGV2M63jgo6T7fc2MiBOr\nnlsJ+AmwCfA00OU9yCT9A3g0//tB3rYGWAIcAewHvCEizpA0AXgQ2DQilnfQ3EnA2yUd2lWfud/N\ngXOA5aT6HQaMA34NPAusA9wQESd315aZmQ2eomaukyNiD0kfBY6JiO0k7QQcD2wMbB4RiyV9nxRG\n53XSznTgMxFxr6QjJI1r93xdROwpaUPgOqDDcJU0Dfgw8M6IWCHpKknvr1plP2DliNhe0puA/bvZ\nv7WBzSJigaQrgLMi4kZJuwLfBI4G7gLOIM1Kr+skWAG+DhwRERdKelc3/f4U+FREPCTpg6RgPwFY\nF9gdaAZmSdo8Ih7orJEli5pppaWbrsaGceWlLFk0eLVoWdxMeh9mZmNJUeF6f/66AHis6vFE4JGI\nWJyXzSSFQmc+BZwg6S3An4FSu+fbAmQeXc82NwZmR8SK/P0s0ky67fDvW4G5ABExT9K8LtoCaIyI\nBfnxpsBJkr6cx7csIpok3SdpB+CTwHHdtNdTa0bEQ/nxTOAbeR8ejIgmAElzAFGpzWscvE1tQcMZ\nLQYz7Gqoq6ujVGr/ozw8NDT4Z6ONa1HhWvRfUeHa2TnLMrCJpIk5YHcCnuiincNIs7pXJN0AbNdF\nP129Wj0OHJcP/5aBHYFLgc3z848ABwLnSFqLdHi1K9X9PgZ8NyJmS1JuG+BC0uHvmojoah9X0PML\nyZ6VtGkO2J1JtSuRalpDOuS9LXBxV43U19czf35zD7sc3Roaage9Fo2NCwe1v54ailoMV65FhWtR\n0Z83GQN9tfAy4DTgNkl3A28EftzF+g+RDnP+CXgemEPXwd2hiHgYuBK4G5gNPB0R11Q9fy3wkqQ/\nkw61vtjNflT39UXgdEm3kwL7r7nNmaTZ8SXdtPUUsKmkz3e1D9lhwLmSZgKfA47Ny18hnXf9M3B1\n1ezWzMyGgVK5PJgXylp/SVoXuDwitu/pNk1NTeXGRr8TBZgypZbBrEVt7fA+JOwZSuJaVLgWFQ0N\ntX3+5R2x2P/JAAAPiUlEQVT0P8XJFxDNoDJrK+XHd0TEV3vZ1mHAQR20dWJEzOllW3uTzpW2b+us\n6llvL9r7DfCGqkUlYEFE7NduvcLq0ZkZc5tpZXwRTY14k1+AhYsGpxYti5s5YBrU1dUPSn9mNnx4\n5joGXHh3U7m11OVfG40ZtZNraF44OFcLL1nYxN7rLxu24eoZSoVrUeFaVPRn5jrsP6HJzMxspHG4\nmpmZFczhamZmVjCHq5mZWcEcrmZmZgVzuJqZmRXM4WpmZlYwh6uZmVnBHK5mZmYFc7iamZkVzOFq\nZmZWMIermZlZwUZ1uEp6n6RDC2hnJ0mX92G7o3u43vclHV71/bGSZkv6s6RTetuvmZkNrUG/5dxg\niogbC2yuL7cPOhn4UWdPSppCut3cRsDjedl6wIERsU3+fpakq/MN4M3MbAQY1eEqaTqwB7AuMA/Y\nAJgbEZ+RtAbwS1JoPgpMi4hdetDm0cCHgIlAI7AfsB5wCbCMdDTgIGA6sKqkcyPis500Nxk4Ddiz\natm8POY2rwNaJJ0GbAhMAd5ICu39ScE8PSLmdjd2MzMbHKM6XKtsBOwGtABPSVoNOAX4RURcJOlA\nYGoP23pjRLwHQNINwNbAFsAc4EvAjkB9RJwp6bNdBCsR8Xfg75L2qlq2HPh3bv87wP0R8TdJAIsj\nYk9JXwb2jIgPSvok8DGg03BdsqiZVgbnHqbD3bjyUpYsGpxatCxuBmoGpS8zG17GSrj+LSIWA0h6\njvSKtx5wfn7+NuCwHrb1Sj7/ughYGxgPXAR8GbgRWACclNft0412Ja0MXAw0AZ+peuq+/HUBabYN\n8B+6eQU/eJvavgxjFBuswKuhrq6OUqnP91secA0N/tlo41pUuBb9N1bCtfp8adsr3YPAu4GHSLPP\nbknaFNg3It4paRXg3tzePsCdEXGGpI+RgvbT9DFcgWuBWyLiO13sR4/V19czf35zH4cyujQ01A5q\nLRobFw5aX7012LUYzlyLCteioj9vMsZCuLYPpLbvvwX8TNKHSLO/nngSWCjpTlJwPgusRTokfKmk\nV0jnXI/J6z8iaUZEHNzTwUralxT64/Ph4jJwYgf7YWZmw1SpXPZrttIJzR9HxK5DPZYBUvY70cTv\nyitciwrXosK1qGhoqO3zOZ2xMHPtMUknA++hMkss5ceHRMQ/+tjmm0h/btO+zTsi4qv9G3HPNDU1\n8fLL/mUBmDBhxYDWorZ2eJ9jNbPB4ZnrGHDOzf8qtzJ+qIcxLEyeVMPCAbpauGVxMwdMq6Gurn5A\n2i+aZygVrkWFa1Hhmat1aZVJtbSWVh7qYQwLEyfXDHAtlg1g22Y2Uozqjz80MzMbCg5XMzOzgjlc\nzczMCuZwNTMzK5jD1czMrGAOVzMzs4I5XM3MzArmcDUzMyuYw9XMzKxgDlczM7OCOVzNzMwK5nA1\nMzMrmD+4fwSRtDLwOPABYNWIuFPSL4GDI2L50I7OzMzaOFxHlrbbH30YeB64MyIOGsLxmJlZBxyu\nw5ykScAvgNcDTwFrA9OBVyTdB1wJKCJe6ayNJYuaaWVg7mE60owrL2XJAN7PFWoGpG0zG1kcrsPf\nkcBDEXGKpG2AXYGfAc9HxD2Sur3b/cHb1A7wEEeagQrAGurq6iiV+nx/5UHX0OCfjTauRYVr0X8O\n1+HvrcDvASJirqT2d+Pu9pW8vr6e+fObB2JsI05DQ+2A1qKxceGAtV20ga7FSOJaVLgWFf15k+Gr\nhYe/R4HtASRtAYwHVuD/OzOzYcsz1+HvfGCGpJmkK4VbgHuB70h6HOj2sLCZmQ0uh+swFxFLgY92\n8NT1+ev63bXR1NTEyy/7MA/AhAkrCq1Fbe3IOsdqZoPD4ToGzJjbTCvjh3oYw8LkF2DhomJq0bK4\nmQOmQV1dfSHtmdno4XAdA1aZVEtraeWhHsawMHFyTcG1aH99mZmZL4oxMzMrnMPVzMysYA5XMzOz\ngjlczczMCuZwNTMzK5jD1czMrGAOVzMzs4I5XM3MzArmcDUzMyuYw9XMzKxgDlczM7OCOVzNzMwK\n5nBtR9LKkj7dxfPvljSti+enS/rGwIzuv30cPVh9mZlZ7zlcX2tN4NAunv8UsHY3bQz0DcxPHsS+\nzMysl3zLudc6CXibpFOAbYA6YBxwCtAE7AFsIekRYB/gQ8BEoBHYr7vGJU0BLgVenxdNBz4OPBcR\nP5Ek4PyI2KWT7U8C3iDpXOCePu+lmZkNGIfra30d2BSoBW6KiHMkrQXMioj1Jd0AXA48A6waEe8B\nyMu37kH7JwPX5CB9ZyfbdDobjYgzJX02Ij4raXpPdmjJomZaaenJqqPeuPJSliwqphYti5uBmkLa\nMrPRxeHaubcBvwCIiGclNUlqaHsyIsqSlkm6HFhEOlQ8vgftCrgotzEbmC3ptKrnS0XtQJvP7b5O\n4W2ObPUFtbN6Qe0MnYaG2qEewrDhWlS4Fv3ncH2tFaRz0Y8BOwIPSlobeAPwUn5+nKRNgX0j4p2S\nVgHupWfB+CjpcPNDknYE9gL+DayVn9+yB204LM3MhjGH62u9SJqB1gMbSvow6djfYRGxQtIc4BvA\ngcBCSXeSwu5ZKgHZlW8AF0v6BCmo265MvjKH7b09aONRSTOAW3qxX2ZmNkhK5bIvNjUzMyuSZ64D\nRNJvSIeS25SABRHR7RXFeftTgF2pXNxUyo8PiYh/FDlWMzMrlmeuZmZmBfOHSJiZmRXMh4VHMUkl\n4DxgM6AFODQinh7aUQ0eSa8DLgbeAkwg/Q3zo8DPSBeTPRwRRw/V+IaCpNWAvwC7Aa2M0VpI+grw\nQdLFi+cBMxmDtci/I5eSfkeWA4cxBn8uJG0LfDMidpG0AR3sv6TDgMOBZcDXI+IPXbXpmevoti+w\nckRsD5wIfH+IxzPYPgE0RsSOpE/WOpdUg5MiYidgJUn7DOUAB1N+IT0fWJwXjclaSNoJ2C7/XuwM\nvJkxWgvSnwKOi4h3AV8DzmSM1ULSF4GfAivnRa/Zf0mrA58DtiO9lnxDUpefa+BwHd12AG4AiIg5\nwDuGdjiD7krSx1ZC+gjL5cCWEXFnXnY9aQY3VnwX+DHpz8ZKjN1avA94WNLvgGuB3zN2a/EE8Lp8\nlKueNCsba7X4G6/+6Nqt2u3/7qTPJpgVEcsj4mXgSeDtXTXqcB3d6kifh9xmuaQx838eEYsjYpGk\nWuDXwP/j1R/A0UxxH9c0rEn6JPBiRNxMpQbVPwtjphbAFGAr4MPAUaRPYhurtVgIrAc8DlwAnM0Y\n+x2JiKtJb7zbtN//OtLH4Va/li6km7qMmRfaMepl0g9Fm5UiYsVQDWYoSHoTcCtwaURcQTqP0qYW\nWDAkAxt8hwC7S7qNdA5+BtBQ9fxYqsVLwI15FvIE6XqE6hfKsVSLY4EbIkJUfi4mVD0/lmrRpqPX\niJdJIdt+eaccrqPbXaRzKuSbBDw0tMMZXPk8yY3AlyLi0rz4/vxJWAB7And2uPEoExE7RcQu+W5L\nDwD/A1w/FmsBzCKdNyPflGMS8Kd8LhbGVi3+TWVGtoB0kev9Y7QWbe7r4PfiHmAHSRMk1QMbAw93\n1YivFh7dribNVu7K3x8ylIMZAieSbu13iqRTSR/C8QXgnHwxwmPAVUM4vqF2AvDTsVaLiPiDpHdL\nmks6BHgU8HfgwrFWC+CHpI9jnUm6cvorpI9gHYu1aPOa34t8o5azSW/MSqQLnl7pqhF/iISZmVnB\nfFjYzMysYA5XMzOzgjlczczMCuZwNTMzK5jD1czMrGAOVzMzs4L571zNRjBJ65I+H/YR0t/frUT6\n9JgZEXF6N9vdHhHrdbHO1sD+EfEVSXuTPnO10zZ7MNatgCMi4vC+ttHL/v47/sHoz6yaw9Vs5Hsm\nIrZs+0bSmsCTki6PiOhiu+7+yH0TYDWAiLgOuK4/g4yIe0m37Bos/x2/2WBzuJqNPmvlr80Akr4M\nfIQ0q72x/UxO0jTSB7ZPIoXR94CfA2cAkySdSLqTzs7Ab4HDI2LvvO3RwEbAccB3gJ1IdyD6WUSc\n1a6fnYDT8z0zbwPuJ91xpQb4fP63CfCDiDhL0mnAW4ENgFWBn0TEd/MdXH4IvIf0ObCXRcS3c/vf\nzvv5DLBF1fjPBS4C1s71mRkR0/M2J5Fuw/c24K/AQRGxXNKxwBGkD3X/fZ7Br0b6gPt1ct8nRcSf\nev5fY2OFz7majXxrS7pP0mOS5pNCcd+IeFbS+0h3gHkHsCWwjqSD2m3/aeBrEbEtsCtwZkQ0AacC\n10bEN/J6ZdItuLbIn68KcCBwGekm2+WIeAewLbCvpHd1MNbq2XI5It6etz+bdNuvHYHTqtaZCuyS\nx3+EpM2BI4F1ImJa7mt/SXvm9TcCdomIfduN//3A/fm+pW8Ftpe0Rd5mO+AzpHBdF3hfPqR8ZO53\nM2DLvP5ZwEURsTWwD3CBpEkd7KeNcQ5Xs5HvmYjYMiLeRuWuJrfl53Yj3YvyXuA+UtBObbf98cAq\nkr4CfJ00g+1QRCwnzV73l/RmYNWI+Evu54OS7gfmkGaIm3Yz7uvz138AsyNiaUT8k1ffoebyiFiS\n76F5DWm2uivwszyeJaRbxr2nMsRY2MG4rwBukfQF4BzSTHhyfvrhiHguIsqkz5JdlRTy10XEwoho\njYj3RkTbTPuMvJ/Xk2bpG3SznzYG+bCw2ejyJdJdb04AvkV68f9hRPwQQFId6TBn9e3mfk26Ddt1\nwBXAR7vp4xfA10gh9Mu8bBzp7kO/y/28kXTPy65Uf/D58k7WqV4+jnQz71K7dUpUXsuWdNSIpM8B\nHyId0r0ZmFbVTkvVquW8fFm77dckHTpeCdg1IhZULX++k7HbGOaZq9nI99+wiYhWUrD+v3x+8Fbg\nfyRNkvQ60uzvw+223w04NV+0tDNAPq+5nA7egEfEHNJ5y0+QDumS+zlc0uskTSbdPWTb/u4PsJ+k\n8ZLeAHwAuIk0K58uaSVJE4GPU5mpV6se/27ABXkGWwI2J4V1Z+4E9pQ0MdftctKs/1bgaABJm5DO\n0U7s227aaOZwNRv5XnXVb0TcCPwZ+N+I+D3pMO4cUhDcFxEz2m1/OnCXpL8Au5Nuv7YeMBd4p6Qz\n2/cB/ApYGBF/z9+fT/qToPvzdhdFxMyejrmL55aQgvou0rngx0mzz2eAB0mHu38XEdd00E71+H8A\nnJ738dzcXkd/hlQGyIeAzwVm5326PSJuJV109U5JD5IC9+MRsaiLfbExyrecM7NhKV8tXI6IM4Z6\nLGa95ZmrmZlZwTxzNTMzK5hnrmZmZgVzuJqZmRXM4WpmZlYwh6uZmVnBHK5mZmYFc7iamZkV7P8D\nIF/KRHBPP4MAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1178c47d0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Top Ten\n",
    "feature_importance = est.feature_importances_\n",
    "feature_importance = 100.0 * (feature_importance / feature_importance.max())\n",
    "\n",
    "indices = np.argsort(feature_importance)[-10:]\n",
    "plt.barh(np.arange(10), feature_importance[indices],color='dodgerblue',alpha=.4)\n",
    "plt.yticks(np.arange(10 + 0.25), np.array(X.columns)[indices])\n",
    "_ = plt.xlabel('Relative importance'), plt.title('Top Ten Important Variables')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Other Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import xgboost as xgb\n",
    "from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.973252109372\n",
      "0.0\n",
      "[ 0.          0.30769232  0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.05128205\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.          0.          0.          0.          0.          0.          0.\n",
      "  0.05128205  0.30769232  0.2820513   0.          0.          0.          0.\n",
      "  0.        ]\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeUAAAEZCAYAAACpYD0AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xl4XWW5/vFvLIUypBElIIMi0+9mKJMICjILgigyKCoc\nBVFk0OMAqEdQhgMK6lEU4aAgiCAIKiICIvNYkCKCKNONokeZadGGQluhbX5/rDfs3TRjkzarzf25\nrl7ZWcP7vutJs5/1vGtlr5bOzk4iIiJi5L1qpAcQERERlSTliIiImkhSjoiIqIkk5YiIiJpIUo6I\niKiJJOWIiIiaWGKkBxARFUmnAtuWb9cH/grMBDqBLW3/e4jtXwq8EWgBNgb+BMwBJtt+x1DaLu0v\nBcwAlrM9fajtDbLvw4CZts+dz/1fBVwH7N48dkltwJPAVrbv67bPtcCltr8/iH6uBQ6x/bc+trkI\nuM32Gd2Wj1h8Y+FJUo6oCduf6Xot6a/AfrbvHcb29y5tjwFeAra2PW242i9G6oMPtgVuG8L+SwI7\ndF9ou0PShcBHgeafz5rAm4E9B9PJMJz85IMlFnNJyhH11FL+vULSdsDXgaWokuqXbV8n6WPAe4Gx\nwCrAY8D+tp8dZPvbAicB44BZwLG2r5V0CLATVeJaE5gOfMj2n3trXJKAy4BJwOalry8AhwHrArfb\n3r9sdwVwFzCBqnI/zPYkSUsCpwLbALOBO4AjbM+Q9BRwE1XF/3VgF+BtkmYCVwNnAsuXeDwK7GN7\natnvDOAdwGrAj20fC/ywDP0OSTvZntJ0ON8DrpX0Odsvl2UHlX2nS1qln/66xvm50s/bAZdj2wwY\nX477QNt3l/Z3lHQAsCxwFfBfPcT40DKOFuBZ4FO2/yJpR+BrTZueYPvKXn5UUTO5phyxCJC0AvBT\nqoS1KfAx4CeSViubbAUcbHsDqmnpUwfZfjtVYnm/7TcD+wA/lLRS2WQ74CDbGwL3AUcMpFmqxLUB\ncCfwP1QnDxsC75S0SdlubeAS25sAJ5TjBDgRaC3bb1Jef7Wp/bttb2D7fOAa4Gu2fwj8B3C97a2B\ntaiS1r5N+421vU05pv+S9DrgQBqXCZoTMmW24hFgrxKrJYCPAKeXTfrr7/dlnL+hUeluDbTZ3rLE\n5xLg8037vI7qZGQz4G3A/nMFVnpHieVWtjejOtHoitsJwFdsb0F1EjTPDEDUV5JyxKJhS+Chruls\n2/dTJbrtyvqrm65T/oCqEhyMrYGVgSsl3QtcTlUtTyjrJ9meXF7fA7xmAG1Os31Def0oMNH2zHI9\n9OmmNp60fVk5rsuApSWtD+wKfM92p+1OqiT4zqb2J/bUqe3/Af4g6QiqZCVguaZNflW2+wfwL6oK\nt8tcswdNvkd1IgSwB3B/10zBAPprnlZvKfvcApwk6TBJ3yptNu9znu2Xyn0EFwI7dxvPu6hmHO4s\nP68TgZUkLQ1cDJwj6XyqexOO7eWYooaSlCMWDT39ro6hmrKGKoE2L5/dT3vdr02OAe61/Sbbm5Zq\nfCvglrJ+Rrd9e0tezbrfmPZyj1vNPfauscxi3mNuPl6AF3pqrNwwdxTwFFUyvanbeHs7lr6O6WfA\nRpLeQDVl3FUlD6S/ecYpaW+q6f3ZwC+As7vt0/zza2He2I0Bzu76eQGbAlvYnmH7dKrp8huB3YE/\nlmQdi4Ak5YhFw2+BDSS9CUDShlRJ8+ayfuemqeaDqa7T9qV7Arod2FjSW0r7m1NN2a4wyHG29PK6\nL28o18uRtA8wxfYjVFPSh0l6Vbk57TDg2l7amEUjYb8D+Jbti4CpVNdwx/QzhllUCXpsTyttvwSc\nCxwJrMPc8Z2f/nammrI/C/gDVaXcvM9+kpaQtAzV1PVVZXlXTK8BPlQuOwAc3rWNpLuBdW3/iOr/\nwopU161jEZCkHFFPc1Wy5aatDwDfl3QfcB7VzVb/VzZ5nOoa84NUNxv1d823e/tPAe8HTpP0B6op\n8H1sPz2Ecfd1p3DzuueBQ8txfQbYuyw/DngR+CPwAFXl/fke9ocqIX1B0mfLfmdI+h1wEVW1v3Yv\n+3UC2J4NXAn8TtLa9OxM4FDgrDKd3mXQ/QH/C7yrTD3fCjxMdT26y2NUJ2K/A35t++fdxnsFVbV+\nY4nb7sD7yjZHAt+UdA/Vn3l9wfYzvRxT1ExLHt0YsWgrd1+/q+tPnhYl5e7ribbb+904YhRIpRwR\nIy2VQUSRSjkiIqImUilHRETURJJyRERETeRjNqNXnZ2dnS0tA/2rloiIKOb7jTNJOXrV0tLC5MnD\n/byCRVN7e2tiUSQWDYlFQ2LR0N7eOt/7Zvo6IiKiJlIpR686Ojp4/vmc+QIsueScxKJILBoSi4bE\nomEolXKScvTq/LumMbvnTx0cdZZ7Bl54MbGAxKJZYtGQWFRmTp/GF9fqf7veJClHr5ZetpXZLUuN\n9DBqYZnlxiUWRWLRkFg0JBbDI9eUIyIiaiJJOSIioiaSlCMiImoiSTkiIqImkpQjIiJqIkk5IiKi\nJpKUIyIiaiJJOSIioiaSlCMiImoiSTkiIqImkpQjIiJqIkk5IiKiJpKUR5CkXSQd1Mu65SXtOwx9\n7CnpdUNtJyIiFrw8JWoE2b6mj9UbA+8BLhpiN58BHgSeHmI7ERGxgCUpjyBJBwC7AqsDjwFrAZNs\nfxI4GthI0kG2z+5l/79TJdwHgR8Cp1DNfqwAHAa8BtgEOF/S1mXZvsAc4GLbpy/Aw4uIiEFKUq6H\ndYCdgJnAXyX9N/BV4JDeEnKxKrCx7amS3g8cYfuBMu19oO1DJP0BOLj08X7gbUALcJ2ka2z/ubfG\nZ7w4jdnMHJYDXNSN6fw3M15MLCCxaJZYNCQWlZnTpwErzff+Scr18Bfb0wEkPQmMG+B+U2xPLa+f\nAI6VNB0YD3Q0bdcCTKCqyG8o37+aKlH3mpT336J1MMcwCgz0xzIaJBYNiUVDYjHUGCQp10Nn0+uW\n8nUOMGYQ+30X2M+2JR1PlYCb2zFwv+3dACR9FvhjX423tbUxefK0AR3A4q69vTWxKBKLhsSiIbEY\nHrn7euR19vL9o8AESZ8e4L4/Bi6RdAtVBbxKWX4HcB7wD+BGSRMl/Q5Ym6q6joiImmjp7OyeEyIq\nHR0dnVOm5MwXYIUVWkksKolFQ2LRkFg0rLXWai39b9WzTF/XnKTdgSNoVMUt5fWptn+1IPs+/65p\nzGbsguxikbHcM/DCi4kFJBbNEouGxKIyc/o0vrjW/O+fpFxztq8ArhiJvpdetpXZLUuNRNe1s8xy\n4xKLIrFoSCwaEovhkWvKERERNZGkHBERURNJyhERETWRpBwREVETScoRERE1kaQcERFRE0nKERER\nNZGkHBERURNJyhERETWRpBwREVETScoRERE1kaQcERFRE0nKC4GkbSRNmI/9/iZpyQUxpoiIqJ8k\n5YXjo8Cq87FfHnYdETGK5NGNgKQDgD2BVuC1wEXAe22/pay/GPgWcA5wK7AR8DDwDLAtMBPYDfgy\nsAawIvAG4HDgOWBXYFNJD9h+fAD9n2D7l1TPTkbSBsApVCdRKwCH2b5T0iPARGBd4Oky5h4TuaSb\ngD8AE0o/+9h+bD5DFhERC0CScsMytneStCJwF/CYpHWpEu8bbf9OUitwQUmIDwGftX1MSXgblHZm\n2t5N0k7AEeX11cBFPSXkXvqfJOnypnUblLYekLQvcCBwJ7AmsL3tJyVNBDYvY+/NJNuHS/oKsC/w\njb4CMuPFacxmZl+bjBpjOv/NjBcTC0gsmiUWDYlFZeb0acBK871/knLDLQC2n5X0L+D7VMnvH8AF\nTdvdW75OBR5qej2u2/rHmpbNT//tNKavnwCOlTQdGA90lOWTbT85iP6ax9bv/5r9t2gd+OhHhcH8\nOBd3iUVDYtGQWAw1BknKDZsBSFqJanr3UuBzwBRgn6bt+rvO29P6OcCYQfQ/HniWMn0NfBfYz7Yl\nHQ+s3k9bgxlbr9ra2pg8edp8drV4aW9vTSyKxKIhsWhILIZHbvRqWFnS9cAVVNdsZ1BdP37W9tSy\nTXNS6+l1b0lvEnCyJA2i/zlN7V0AXCLpFmAdYJV+xtOT3DQWEVFzLZ2dea8uN1rJ9tHdlp8OXGL7\n5pHovwY6c+ZbSRXQkFg0JBYNiUVDe3trS/9b9SzT172QdA3VNdubh7HN/wXWp1G1tpTXPx2m9l8P\nnN9D+7fY/u/BttfR0cHzz+eXDGDJJeckFkVi0ZBYNNQ5Fq2t42lpme88uVClUo5enXbd452zGTvS\nw6iF5ZYdxwu5sxRILJolFg11jcXM6dPYZ8I4xo9vW2h9plKOBWLpZVuZ3bLUSA+jFpZZblxiUSQW\nDYlFQ71j8fJID2DAcqNXRERETSQpR0RE1ESSckRERE0kKUdERNREknJERERNJClHRETURJJyRERE\nTSQpR0RE1ESSckRERE0kKUdERNREknJERERNJClHRETURJLyQiJpG0kT5mO/v0lackGMKSIi6iVJ\neeH5KLDqfOyXZ2tGRIwSeXRjIekAYE+gFXgtcBHwXttvKesvBr4FnAPcCmwEPAw8A2wLzAR2A74M\nrAGsCLwBOBx4DtgV2FTSA7YfH0D/J9j+JdBS1m8AnEJ1IrUCcJjtOyU9AkwE1gWeLmOeJ5FLGg/c\nA6xju1PS14C7bV8ylLhFRMTwSVKe2zK2d5K0InAX8JikdakS7xtt/05SK3BBSYgPAZ+1fYykm4AN\nSjszbe8maSfgiPL6auCinhJyL/1PknR507oNSlsPSNoXOBC4E1gT2N72k5ImApuXsc/F9vOSbgN2\nkXQt8E6qE4hezXhxGrOp30PLR8KYzn8zo4YPcB8JiUVDYtFQ11jMnD4NGDfSwxiwJOW53QJg+1lJ\n/wK+T5X8/gFc0LTdveXrVOChptfjuq1/jMH9b+jefzuN6esngGMlTQfGAx1l+WTbTw6wv7OBTwNj\ngOtsz+prMPtv0TqIoY8Gi84v9oKXWDQkFg11jMU4xo8fT0tLy0gPZECSlOe2GYCklaimkS8FPgdM\nAfZp2q6/67w9rZ9DlQwH2v944FnK9DXwXWA/25Z0PLB6P23Nw/btkk6lur7dZ5UM0NbWxuTJ0wbb\nzWKpvb01sSgSi4bEoqHOsZgy5YWF2l97+/wXNLnRa24rS7oeuILqmu0MquvHz9qeWrZpTrg9ve4t\nYU8CTpakQfQ/p6m9C4BLJN0CrAOs0s94enMh8DrbD/W7ZURELFQtnZ25uRdeudFKto/utvx04BLb\nN49E/wugn88BU2z/qL9tOzo6OqdMqeeZ73BobR34lFadq4CFLbFoSCwaEouG9vbW+Z4rz/R1HyRd\nQ3XN9uZhbPN/gfVpVLUt5fVPh6n91wPn99D+LcAbgZWB3QfS1vl3TWM2Y4djWLUzc/o09pkA48e3\njfRQIiJekUo5enX2HR2ds1uWGulhLBAzXuhg9zVfHnBSThXQkFg0JBYNiUXDUCrlXFOOiIioiSTl\niIiImkhSjoiIqIkk5YiIiJpIUo6IiKiJJOWIiIiaSFKOiIioiSTliIiImkhSjoiIqIkk5YiIiJpI\nUo6IiKiJJOWIiIiaGPakLOkASe8ury+SNEnSwZIOGmQ7y0vat7z+L0lvHu6xNvW1lKSP9bPNNpIm\nDENfy0l6WtIy3ZbfI2mtAey/UnmcJJL2kvSIpP+UdMkgxvDU4EceEREL2rA/utH2eU3fvt32ivPZ\n1MbAe4CLbH996CPr08rAQcA5fWzzUeBi4P6hdGT7BUmXA++jesQikt4E/NP2owPY/xngP8u37wYO\nt/1r4PRBDCOPBouIqKE+k7KkXwDfsX2bpM2A/waeBtahek7vl23fKulPgIGXytengY2ANkm/BC4D\n1rV9lKQvA3sAY4Dv2f6BpJOAzYDXAvfZ/hhwNLBRqbDfBlwE3AicC6xJVeWfYvvnkm4C/gBMAFqB\nfWw/1ssxbQV8q4x1OlVyPBpYr4ztXOB7wFJUyfrLwOPArsCmkh4E7rK9cmnvorL9U2Xfl8vY9rP9\nRC+hPRs4mZKUqRL+WaW9fYDDgVnARNtHSzoO2ApYlurk4VzgJGA3YDNJzwG/tL2ypA2BU0u7z5W2\nXyztrw/8tRxbRETUTH/T1z8APlJeHwj8BphseztgT+CMsm454ATb+5XvO21/EnjO9l5dyyRtAuxi\ne3NgC+D/SWqlqhJ3ATYHtpS0MvBV4EbbZzeN5xDgWdtvA3YGviLptWXdJNs7A9cD+/ZxTHsCPwW2\nB74PLF/6etD2V4B1gW+W8RwCfNL2PcDVwOdLsu+p0twZmATsBBwP9PqgXtt3ActLWlXSksDbgUsl\nLV/23dH2tsBqknYquz1oe2tgBlV8r2ga051NYzoL+ITtHal+Xv8F7AUsZXsr4Chgrqnz3sx4cRoz\nXuhYLP/NnJ7nvkZE/fQ3fX0N8I2SLLahSuJbS3oLVaU8pikpPjKA/gTcBWB7FvB5SUsAK0m6kKqi\nWxYY28v+6wHXlf1fkPQQ0HUd9t7y9TFgpT7GcBLwJeAGqgr4TqqqvctTwJebrjE3j6Wl29fm1+dQ\nJcBrgKlU1XdfzgE+DPwNuNz2LElrA+3AVZJaqE521izbu5d2uj9Mez3gDEldY/8z8AKNuD8mqcdZ\nhO7236J1IJstosYxfvx4WloG/izy9vbFOR6Dk1g0JBYNicXQ9ZmUbXdK+jnV9OwvgSnAP2x/TdI4\nqsTzz7L5nB6a6P6O9zBwKICksUDXtdDX2/6gpBWoqrqW0l73Sv5BYFvgV6XCnkA1HQsDv076IeBc\n25+X9EXgYOBHTX2dCJxl+xpJHwEOaDq+rm2WKDdqzQI2KMv2AG6zfYKkD1Il6L5uHruQKoE/BRxZ\nlv0N+Aews+3Zkg6gOtnYi57j26wr1g8D+9t+vEzVv66Mc1/gNEmrAKv10xYAbW1tTJ68+FaUU6a8\nMOBt29tbF+tYDEZi0ZBYNCQWDUM5ORnI3dfnUiWFc6imRteTdDNwO/B32530nhDnWm77PuAaSXcA\ntwI/ppryXaO0eQnwKLBK+bqhpE83tfMD4LWSbqO6vny87Sl99N+Tu4BzJF0P7EB1XfdZYElJJwM/\nA75VxrMTsELZbxLwNVUl6HfK9z8D/q+svxs4QdINVNPep/U1CNtTqRLouK4bvMqxnALcKulOquvY\nfc1AdPbw+hPAj0uMTgb+aPty4DlJvwW+XY43IiJqpqWzMzfiRs86Ojo6p0xZfM98W1sHPn2dKqAh\nsWhILBoSi4b29taBXxfrZtj/JKouyp3jyzctagGmNt14tqD7Hwtcy7xVvG0ftjDGMFTn3zWN2b1e\n3l+0zZw+jX0mwPjxvd6PFxGx0C22Sdn2e0e4/5eppscXWUsv28rslsX5r6deHukBRETMJR+zGRER\nURNJyhERETWRpBwREVETScoRERE1kaQcERFRE0nKERERNZGkHBERURNJyhERETWRpBwREVETScoR\nERE1kaQcERFRE0nKERERNbHYPpBipEnaE7jT9tO9rB8PXACMB8YCR9q+s2n90cCGtvcdYH/bAP+y\nfb+kp2yvPOSDiIiIhSqV8oLzGaqE25sjgOttbw8cCPxv1wpJ7wR2Y97HPvblo8Aq5XUekh0RsQha\nIJWypFbgbKCNKlGcAdwDfIfqucZPAP8BbAJ8u3mZ7X/30uaRwAeonrd3q+2jJB0HrAusCLwa+JTt\nOyTtAxwOzAIm2j66bLtG2fYNwOG2r+ulr+2ALwFzgJWAH9g+Q9K2wHFlvMsB+wGPAT+jSsDLlP2W\nLMd2vqStbc/qoZtTgK5jHQvMKH2vDXwcOBY4qGk8R5XtVwPOBHYENgJOBX4H7ApsKukhYJykC4DV\ngSnA+4C3At8EXgKmA++z/WJPxx8RESNjQU1frw1cZPsySSsDtwAvAB+0/YikA4H1ge8DH2hath7w\nh+6NSZpASSy250i6RNK7yuoXbb9d0vrATyTtABwPbGZ7pqTzJe1Utp1pe7fy/ZFAj0m5WIUqsS4B\n/EnSz4ANqE4cnpZ0FLAP8CvgtVRJcSVgHdtXSboXOKSXhIzt58uxvQ74MfBpScsCpwMfLn01WxXY\nGNic6iRgTeD1wKW2z5R0NfAT249JWg44qry+EdgU2AP4KVUSfw+wPNBnUp7x4jRmM7OvTRZZM6dP\nA8aN9DAiIuayoJLyM8BnJe0NTKOqBF9n+xEA2+cCSFqp+7JerEt1fXZO+X4iVdLqBG4s+z8oaSVg\nLaAduEpSV0W7Ztnv3vL1MWCpfo7hjpJQZ0m6v7T7BHCapGlUFevE0u9ZwMVU8fxu2b+l/OuVpA2B\nn1BdT54oaS+qxP5TqqS5sqQvAJOA+8sJyVTgUduzJf2LuTNLV3/P2X6svH6GqoI/iaqKvwF4HLiT\nfuy/RWt/myzCxjF+/HhaWvr8Ec2lvX1xjsfgJBYNiUVDYjF0CyopH0mV1M6UtD3wLuBJSWvb/ktJ\nNI+UZWvZfrQss+1f9dDew8ARkl5FlYi3Bc6jqmQ3o6qQJ1Alzb8B/wB2LonrAKpkvBeDu9a6aUnq\nS1NV9X8GLgfWtP2ipB8BLaXfVtvvLlXv7cBVVFPfvV6zL5X9z4D32/4T1cH/EvhlWb8dVaX9jfK6\neew9ZZI++wM+BJxr+/OSvggcDJzYVwDa2tqYPHlaX5ss0qZMeWHA27a3ty7WsRiMxKIhsWhILBqG\ncnKyoG70ugL4T0k3AZ+lug58GPDDsmwT4NfAocC5Tcuu6qkx2/dTJbA7qCq8vzYl700lXQ+cBRxk\n+zmq69S3SrqTalr5kfk4hrHAb6im3k+0/U+qaeaJkm6jqsBXKW1vL+mWMsZjyv53UF1TfnUv7Z9E\nVa2fKukmSb8cxNh6OrmYBHxN0rrd1ne9vgs4p8RqB+D8QfQXERELQUtn56J7o265eesp22cNc7td\nVep+w9nuIqgzZ76VVAENiUVDYtGQWDS0t7cO/LpYN7X6O2VJH6e6o7nrTKGlvD7K9qQedhnSGYWk\nY6juYu7e33lDabdbH7+guj7cpQWYanuv4eojIiIWD4t0pRwLVkdHR+dLL+VP2SFVQLPEoiGxaEgs\nGoZSKecdNyIioiaSlCMiImoiSTkiIqImkpQjIiJqIkk5IiKiJpKUIyIiaiJJOSIioiaSlCMiImoi\nSTkiIqImkpQjIiJqIkk5IiKiJpKUIyIiaiJJOSIioiZq9ejGLpJuAg4BtgSes33lIPZ9PbCx7Ssl\nnQKcYvvxYRjTK+0Ota0B9PXKc6IlrQB8E3gDMAZ4DDjS9jPluc+H2t63xOxe20eUNpYCHra9hqTr\ny77rAs8CzwHX2T55QR9LREQMXC2Tchfb8/Nc4x2pks+VXQlqmLzS7jC2ORCXAt/oOhmQ9HbgSklb\nlPXNz978oKTLbN/avM72TmXfHwIX27524Qw9IiIGY4EkZUmtwNlAG7AKcAbwAeBhqsRG+X494EvA\nHGAl4Czb32tqp7liPA3YAhgLHAf8GjgTWA1YGbgcOB74IrC0pDuAI6gq7meAC4DxVBXjl23fLOk+\n4BZgozKGPWzP80BQSa9qave3wCnAOrY7JX0N+D3wie7HZ/tZSScBW5d+v237kkHEcTNganN1bvsG\nSX8Btu1hl88AZ0l6EzC7h/Xz/YzPiIhY8BZUpbw2cJHtyyStTJX4Hgdut32YpEOpkvGlVEl7kzKW\nP0qaJ2lJ2hN4re23SGqjSrb3Ab+1/cMyVfu47WNLkpTtKyQdXpr4MnCt7dMkrQJMBNakStIX2v60\npAuAdwI/696/7TlN7V4uaS9gF0nXAruW9j8BTGw+Pkm/AdawvW0Z452SrrX9/ADjuCbwaA/L/was\nDvy92/L7gPOAbwOfZohJuK2tbSi7L3ba21tHegi1kVg0JBYNicXQLaik/AzwWUl7A9OoqluAG8vX\n3wJ7UE2v3mF7FjBL0gPAWsw9JQugsg+2O4DjSjW+haQdSh9L9jCOrqS0HlWljO0nJXVIWrGs+0P5\n+hgwboDHdzZV0hsDXG97liSAm5qOb0+qE5HNJN1YxrIE8Ebgj90blDQWGGt7elnUCTwBrNFD/+sA\nvU1Bf53qpOOdzBvHQZs8eZ6Jg1Gpvb01sSgSi4bEoiGxaBjKycmCuvv6SKpkuz/wcxrJcbPy9W3A\nA2X5ppJaJC0DrA88wrwV3oNUU9dIapN0NfARqqndD1NNJy9Ttp3DvMf1IGW6V9KqwPJUNzvBwBPX\nHKokjO3bqU4ePgqc07RN8/HdDzwE3Gh7R6pr0j+j58oXqmn2L5TXqwDP2r4DWEnSu7o2krRr6fuW\nnhqxPYcqNt8e4HFFRERNLKikfAXwn+WO4M8ALwNLAR+RdDOwG/DVsu1Y4DdUSeZE2/+kkSi7blS6\nAviXpNvKtqcANwC7lvbOAB4pU+V/AvaQ9P6mdk4GdpR0C9WU+cdtz2buhNxfcv4T8J7SLsCFwOts\nP9S0zVzHV64FvyjpVuBuoNP2i720fxGwvaSJwIrlOAF2B/aTdLuk26kS7rtsdx/vK9/bfoSek/KQ\nK+eIiFhwWjo7F877dNefOZWE0bVsu7Jsv4UyiGEk6XPAFNs/Kt/Pc3yLgc5MR1UyNdeQWDQkFg2J\nRUN7e+t838+zMP8kqvZVWrmuey3zjtW2D2va7lyqO753b9pmQMcn6ePAfk3bt5TXR9meNJ9DXyA6\nOjrI58tERCw8C61SjkVPR0dH50svJSlDqoBmiUVDYtGQWDQMpVLOO25ERERNJClHRETURJJyRERE\nTSQpR0RE1ESSckRERE0kKUdERNREknJERERNJClHRETURJJyRERETSQpR0RE1ESSckRERE0kKUdE\nRNTEqEnKkpaS9LE+1m8jaUIf6w+QdPKCGV2P/T3Vyxje3cc+50p6h6RdJB20YEcYERHDbWE+unGk\nrQwcBJzTy/qPAhcD9/fRxsJ8pNY8fdk+byA72r5m+IcTEREL2mhKykcD60k6BtgCGA+MAY4BOoBd\ngU0lPQDsAewNLANMAfbqr3FJxwFbAcsCHwN2pnpu8hyqZP994CFgI9szJB0JzLJ9ai9NjpN0AbB6\nGcM+wJccYg/bAAAQxUlEQVSAp2yf1c9YDgDWLX3+FHistPNTYAKwCXCV7S/1d1wREbHwjJrpa+Cr\nVEmxFbjW9nbA+4FzbN8DXA18AXgCeI3tt9veEhgLbD7APh60vTVVXD8AvA3YliqprwlcAry3bLsf\ncH4fbS0HHGV7G6CNKpEORlelvQZwILA7cCLwWeCtVCcOERFRI6OpUu6yHnAhgO0nJXVIau9aabtT\n0suSLgJeBFalSswD4fJ1AlVlegPQArwaWJtq6vx7kgw8bPtffbT1nO3HyutnqKr2+fFX2y9Iehl4\n2nYHgKQ5/e3Y1tY2n10untrbW0d6CLWRWDQkFg2JxdCNpqQ8h6qCfYiqer1P0qrA8sBzZf0YSRsC\ne9p+q6Slgd9TJdaB9gFVcr7f9m4Akj4L/NH245JagM8DZwzTcQ1GSy+vezV58rQFNJRFS3t7a2JR\nJBYNiUVDYtEwlJOT0TR9/SxVxdsG7CDpFuBS4OO25wCTgJOBWcALkm4DrgOeBFYZQPuv3Jhl+4/A\njZImSvodVZX8RFl9DrCJ7ZsH2l7T6/5uNOtpfU/tDKStiIhYyFo6O/PeHL3qzJlvJVVAQ2LRkFg0\nJBYN7e2tA51dncdomr4eFpJ+QTXl3aUFmGq73zu0e2hrd+AIGlVrS3l9qu1f9bLPWOBa5q10bfuw\nwY6hLx0dHYyuyZSIiJGVSjl61dHR0fnSS0nKkCqgWWLRkFg0JBYNQ6mU844bERFRE0nKERERNZGk\nHBERURNJyhERETWRpBwREVETScoRERE1kaQcERFRE0nKERERNZGkHBERURNJyhERETWRpBwREVET\nScoRERE1sVgmZUl7SnqdpNUl/XYBtH+KpNWGu92hkjRB0jZ9rN9Y0pfL66cW3sgiImIgFtdHN34G\neBD4N/M+4nDIbB8x3G0Ok/cCTwO39bTS9n3AfeXbPB4sIqJmFsmkLOkAYHdgaeB1wHeBPYANgM8D\nmwDnAx8GVpR0KbAK8EfbB0vaG/gC8BLwpO0P9tHXV4HtgTHAL2z/j6SbgEOAfYE1gBWBNwCH275O\n0ruBY0sT99g+VNJ2wFeAWcCjwCG2Z/fS503As1TPbd4TOAdYHRgLfAoQsBuwDLAm8HXgeuAjwL8l\n/d723T20ux1wqO19qZ7dHBERNbIoT18vZ/tdwDeoEs3ewMHAgcC9wP5USbeVKlltCewoaQXgg8A3\nbG8LXClpfB/97Fv+bQtMLcuaq8yZtncDPgscLmkMcBrwTttbAH+R9AbgLGAv2zsAT5Yx9eVC2+8o\nx/Q321uVcW9R1o+3vTvVychRtp8EfgSc0lNCbpIKOSKiphbJSrm4t3ydCjzU9Hpct+3+avt5AEnP\nUlWXRwBHSfpU2feyPvr5EFUluhLwm7KsucrsGsdjpe8VgH/Zfg7A9jcltQMrAz+TBFWFf10/x/dI\n+SrgqtLWo8B3y0zBH5r6XaqftuZLW1vbgmh2kdXe3jrSQ6iNxKIhsWhILIZuUU7KfVV8c6imm7vr\nSqYHA8fZniLp+8BewI+7byxpSWCfMt2LpAclXdzPOJ4F2iS92vZUSacCF1Alzz1sT5O0OzCtn+Ob\nU74+RFUdXyFpTeBE4Npu/bY07dPTcc+3yZP7G+bo0N7emlgUiUVDYtGQWDQM5eRkUZ6+7k0ncAdw\nHvAa5k5eXa/vAn4t6XqqCvjKnhqy/RLwT0l3luu8V9t+rKmdeU4MbHcCnwSuknRrWfY7quntqyTd\nDhwG3N/PMXQ5E1hT0s1U09Pf6mP73wOfLNeO+5Np7IiImmnp7Mx7c/SqM2e+lVQBDYlFQ2LRkFg0\ntLe3zveNtIvy9PWwkbQ51Q1jXWcoLeX1T22fuYD6fD3VHeLd+7zF9n8Pse1jgB17aPtA238fStsR\nEbHgpFKOXnV0dHS+9NLieIVj8FIFNCQWDYlFQ2LRMJRKOe+4ERERNZGkHBERURNJyhERETWRpBwR\nEVETScoRERE1kaQcERFRE0nKERERNZGkHBERURNJyhERETWRpBwREVETScoRERE1kaQcERFRE0nK\nERERNZFHNw6RpKWAD9k+p5f12wD/sn1/L+sPANa1fVQv648DnrJ91gDG0l9bBwDPAdOAQ23v21+b\nERGx8KRSHrqVgYP6WP9RYNV+2hjO52f22pbt82xfuQD6jIiIYZBKeeiOBtaTdAywBTAeGAMcA3QA\nuwKbSnoA2APYG1gGmALsNcA+9pb0AWBp4NO275b0ycG21VV1Ax744UVExMKSpDx0XwU2BFqBa22f\nJmkVYKLtNSVdDVwEPAG8xvbbAcryzQfYx19tf0LS+sCPgc2A185nWwPW1tY23E0u0trbW0d6CLWR\nWDQkFg2JxdAlKQ+f9YALAWw/KalDUnvXStudkl6WdBHwItWU9tgBtn1raeNBSSuVZS/NZ1uDMnny\ntAXR7CKnvb01sSgSi4bEoiGxaBjKyUmuKQ/dHKo4PgRsCyBpVWB5qpuq5gBjJG0I7FlurvoU1RR3\nywD72KK0uyHwjyG2FRERNZVKeeiepapS24C1Jb0PGAd83PYcSZOAk4F9gRck3UaVQJ8EVhlgH2tI\nugFYEjgYeHQ+28rNXRERNdbS2Zn36ehVZ6ajKpmaa0gsGhKLhsSiob29db5nLlMp14SkX1BNeXdp\nAabaHugd2sPeVkdHB7nCERGx8CQp14Tt99axrYiIWHhSBkVERNREknJERERNJClHRETURJJyRERE\nTSQpR0RE1ESSckRERE0kKUdERNREknJERERNJClHRETURJJyRERETSQpR0RE1ESSckRERE3kgRSL\nAEm7AG8APmp7y0Hu+3pgY9tXSjoFOAX4GPCU7bOGf7QRETG/UikvAmxfA1wLzM/Dr3cE3lbaOcL2\n48M5toiIGD6plBcBkg4Adu1nm+2AQ23vW75/ClgF+CKwtKQ7gCOAQxbwcCMiYj6lUl68NFfSnbY7\nga8BP7F9BfNXaUdExEKSSnnx1TLAZb1qa2sb1PaLu/b21pEeQm0kFg2JRUNiMXSplBcfM4GVASSt\nDrymLJ9Dfs4REYuEVMqLjv6mnu8GOiT9FngY+GtZ/ifgaEn3NLWRaeyIiBpq6ezM+3NEREQdpFJe\nxEg6hurPnLrOplrK6wNt/33EBhYREUOWSjkiIqImcgNQRERETWT6OuYhqQU4A9iY6q7ug2z/te+9\nFh+SlgB+CLwRWBL4KvAg8COqu9nvt/3JkRrfwiZpRaobCXcCZjNK4wAg6YvAe4CxVL8jtzIK41F+\nR86j+h2ZBXycUfZ/Q9JbgK/Z3kHSWvRw7JI+DhwMvAx81fav+2s3lXL0ZE9gKdtbAUdRfV72aPIh\nYIrtbak+Se10qhgcbXs74FWS9hjJAS4s5c33+8D0smhUxgFe+dS8LcvvxfZUn0c/WuOxGzDG9tuA\nE4GTGEWxkPR54AfAUmXRPMcuaSXgU8CWVO8jJ0sa21/bScrRk62BqwFsTwLePLLDWeh+BhxTXo+h\nqgTeZPu2suw3VFXjaPBN4HvAk1Q3FY7WOADsAtwv6TLgcuBKRm88HgGWKLNqbVSV4GiKxV+AvZq+\n36zbse8MbAFMtD3L9vPAn4GN+ms4STl6Mh7oaPp+lqRR83/F9nTbL0pqBX4OfIm5Pw1tGtUb0WJN\n0keAZ21fR+P4m/8fjIo4NFkB2Ax4H3AYcCGjNx4vAGtQfSbCmcB3GUW/I7Z/SXWy3qX7sY8HWpn7\nffQFBhCTUfNGG4PyPNV/qC6vsj1npAYzEsojL28EzrN9MdW1oi6twNQRGdjCdSCws6SbqO4vOB9o\nb1o/WuLQ5TngmlL5PEJ1v0Xzm+xoisfhwNW2ReP/xpJN60dTLKDn94fnqZJz9+V9SlKOntxOdc0I\nSW+l+lSwUaNcC7oG+ILt88rieyVtW16/E7itx50XI7a3s72D7R2APwAfBn4z2uLQZCLlaW2SVgGW\nBW4o15phdMXjnzSqwKlUNw3fO0pjAXBPD78XvwO2lrSkpDZgXeD+/hrK3dfRk19SVUi3l+8PHMnB\njICjgFcDx0g6lurDWT4DnFZu1HgIuGQExzeSPgf8YDTGwfavJW0j6S6q6crDgP8Dzh6F8fgO8ENJ\nt1Ldif5F4PeMzlhAD78XtjslfZfqZK6F6kawl/prKB8eEhERUROZvo6IiKiJJOWIiIiaSFKOiIio\niSTliIiImkhSjoiIqIkk5YiIiJrI3ylHjDKSVqf67OIHqP5+8lVUnzZ0vu3j+9nvZttr9LHN5sB7\nbX9R0u5Unwnca5sDGOtmwCG2D57fNgbZ3yvjXxj9RXSXpBwxOj1h+01d30haGfizpItsu4/9+vtg\ng/WBFQFsXwFcMZRB2v491aPvFpZXxh8xEpKUIwJglfJ1GoCk/wLeT1VFX9O9cpQ0geohBMtSJbFv\nAT8GTgCWlXQU1ZOltgcuBQ62vXvZ95PAOsARwP8A21E9jetHtk/t1s92wPHlmbU3AfdSPX1oHPDp\n8m994Nu2T5V0HPD/gLWA1wBn2f5meZrRd4C3U31O8QW2v1Ha/0Y5zieATZvGfzpwDrBqic+ttg8o\n+xxN9TjL9YA/AvvZniXpcOAQqocVXFlmDFakemjDaqXvo23fMPAfTYwmuaYcMTqtKukeSQ9JmkyV\nTPe0/aSkXaiehvRm4E3AapL267b/x4ATbb8F2BE4yXYHcCxwue2Ty3adVI+y27R8/i/AvsAFwMeB\nTttvBt4C7CnpbT2Mtbk677S9Udn/u1SPz9sWOK5pmw2AHcr4D5G0CXAosJrtCaWv90p6Z9l+HWAH\n23t2G/+7gHvLM4P/H7CVpE3LPlsCn6BKyqsDu5Sp70NLvxsDbyrbnwqcY3tzYA/gTEnL9nCcEUnK\nEaPUE7bfZHs9Gk/4uams24nqWbC/B+6hStAbdNv/SGBpSV8EvkpVMffI9iyqavm9kt4AvMb23aWf\n90i6F5hEVZFu2M+4f1O+/h240/a/bf+DuZ/WdJHtGeUZtr+iqo53BH5UxjOD6rGLb28M0S/0MO6L\ngeslfQY4jaryXq6svt/2U7Y7qT7r+DVUJwdX2H7B9mzb77DdVdmfUI7zN1SzAmv1c5wxSmX6OiK+\nQPUUqM8BX6dKGt+x/R0ASeOppmObH9v4c6pHGV4BXAx8oJ8+LgROpEpePynLxlA9ieuy0s9rqZ45\n25fmD/Sf1cs2zcvHAC8z9/NuKd93vf/N6KkRSZ8C9qaaer4OmNDUzsymTTvL8pe77b8y1RT3q4Ad\nbU9tWv50L2OPUS6VcsTo9EqSsj2bKiF/qVz/vBH4sKRlJS1BVW2+r9v+OwHHlpu5tgco121n0cPJ\nvu1JVNdlP0Q19Uzp52BJS0hajuppOm8Z6vEAe0kaK2l54N3AtVSzAAdIepWkZYD/oDEz0Kx5/DsB\nZ5aKuQXYhCrJ9+Y24J2Slilxu4hqluFG4JMAktanuga9zPwdZizukpQjRqe57qK2fQ3wW+Artq+k\nmm6eRJVA7rF9frf9jwdul3Q3sDPVIwzXAO4C3irppO59AD8FXrD9f+X771P9ada9Zb9zbN860DH3\nsW4GVYK/nepa98NU1e4TwH1U0/KX2f5VD+00j//bwPHlGE8v7fX052CdAGWq+nTgznJMN9u+kepm\ntLdKuo8qUf+H7Rf7OJYYxfLoxohYbJS7rzttnzDSY4mYH6mUIyIiaiKVckRERE2kUo6IiKiJJOWI\niIiaSFKOiIioiSTliIiImkhSjoiIqIkk5YiIiJr4/zQCTJd0nyuWAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x11b2b5bd0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# XGBoost\n",
    "clf2 = xgb.XGBClassifier(n_estimators=50, max_depth=1, \n",
    "                            learning_rate=0.01, subsample=0.8, colsample_bytree=0.3,scale_pos_weight=3.0, \n",
    "                             silent=True, nthread=-1, seed=0, missing=None,objective='binary:logistic', \n",
    "                             reg_alpha=1, reg_lambda=1, \n",
    "                             gamma=0, min_child_weight=1, \n",
    "                             max_delta_step=0,base_score=0.5)\n",
    "\n",
    "clf2.fit(x_train, y_train)\n",
    "print clf2.score(x_test, y_test)\n",
    "test_pd2 = pd.DataFrame()\n",
    "test_pd2['predict'] = clf2.predict(x_test)\n",
    "test_pd2['label'] = y_test\n",
    "print compute_ks(test_pd[['label','predict']])\n",
    "print clf2.feature_importances_\n",
    "# Top Ten\n",
    "feature_importance = clf2.feature_importances_\n",
    "feature_importance = 100.0 * (feature_importance / feature_importance.max())\n",
    "\n",
    "indices = np.argsort(feature_importance)[-10:]\n",
    "plt.barh(np.arange(10), feature_importance[indices],color='dodgerblue',alpha=.4)\n",
    "plt.yticks(np.arange(10 + 0.25), np.array(X.columns)[indices])\n",
    "_ = plt.xlabel('Relative importance'), plt.title('Top Ten Important Variables')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0148713087517\n",
      "0.0\n",
      "[ 0.02588781  0.10778862  0.00734994  0.02090219  0.02231172  0.00778016\n",
      "  0.00556834  0.01097013  0.00734689  0.0017027   0.00622544  0.01140843\n",
      "  0.00530896  0.00031185  0.01135318  0.          0.01488991  0.01840559\n",
      "  0.00585621  0.00652523  0.0066759   0.00727607  0.00955013  0.01004672\n",
      "  0.01785864  0.00855197  0.00985739  0.01477432  0.02184904  0.01816184\n",
      "  0.00878854  0.02078236  0.01310288  0.00844302  0.01596395  0.01825196\n",
      "  0.01817367  0.00297759  0.00084823  0.02808718  0.02917066  0.00897034\n",
      "  0.01139324  0.01532409  0.01467681  0.0032855   0.01066291  0.00581661\n",
      "  0.00955357  0.00417743  0.01333577  0.00489264  0.0128039   0.01340195\n",
      "  0.01286394  0.01619219  0.00395603  0.00508973  0.          0.00234757\n",
      "  0.00378329  0.00502684  0.01732834  0.01178674  0.00030035  0.01189509\n",
      "  0.00942532  0.00841645  0.01571355  0.00288054  0.          0.0011667\n",
      "  0.00106548  0.00488734  0.          0.00200132  0.00062765  0.04130873\n",
      "  0.10076558  0.00022293  0.00165858  0.00308408  0.0008255   0.        ]\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAc8AAAEZCAYAAADxH64ZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmYHGW5/vFvE5MMSXpGMA0EUBCBG0nYJSwimyCLBwUh\nIuhJRFlEVFYX+LGJC64oi4jIIhFPADkKRI8syhICJkE2WR8UXCJrRs1kkslAMunfH1Vjdyaz1Ww9\nPX1/rivXdKqr3nrrmZm++62q6TdXLBYxMzOz3lur0h0wMzOrNg5PMzOzjByeZmZmGTk8zczMMnJ4\nmpmZZeTwNDMzy+hNle6AWbWRdDGwZ/rfrYEXgFagCOwWEa/3s/1fAJsCOWA74AlgFbAoIt7Xn7bT\n9scCy4EJEdHS3/Yy7vtEoDUiru3j9msBdwGHlPddUgPwErB7RDzeYZs7gV9ExBUZ9nMncEJE/KWb\ndWYB90fE5R2WV6y+NnQcnmYZRcTJ7Y8lvQAcHRGPDmD7H0rbHgW8AewREc0D1X6qUn/gvSdwfz+2\nHwPs03FhRDRJ+hnwCaD8+7MZ8C7g0Cw7GYA3Kf4D+hHO4WnWP7n0339I2gv4JjCWJPzOjoi7JH0S\nOBwYDWwILASmR8RrGdvfE/g6UAesBM6NiDslnQDsRxIwmwEtwMci4k9dNS5JwC3AfGDndF9fAE4E\ntgIeiIjp6XqzgQXAFJKR8IkRMV/SGOBi4D1AG/AgcFpELJf0MnAPyQj6m8ABwLsltQK3Az8C1knr\n8TwwLSIWp9tdDrwP2Bj4aUScC1yTdv1BSftFRGPZ4fwQuFPSGRGxIl12bLpti6QNe9hfez/PSPfz\nXiDSY9sJqE+P+5iI+EPa/r6SZgDjgf8DvthJjT+V9iMHvAZ8NiL+LGlf4Btlq14QEb/q4ltlw4yv\neZoNIEkTgRtJgmUH4JPA/0jaOF1ld+D4iJhMcjr24oztF0gC4MMR8S5gGnCNpPXTVfYCjo2IbYDH\ngdN60yxJwEwG5gHfJgn5bYCDJG2frrc5cHNEbA9ckB4nwFeAfLr+9unjr5W1/4eImBwRM4E7gG9E\nxDXAR4HfRsQewDtIwuWosu1GR8R70mP6oqQNgGMonR4vD07S0f9zwGFprd4EfBy4LF2lp/09nPbz\nN5RGjnsADRGxW1qfm4HPl22zAcmbhp2AdwPTVyus9L60lrtHxE4kbwja63YB8NWImEryZmWNEbUN\nXw5Ps4G1G/BM+2nciHiSJJD2Sp+/vew62o9JRlZZ7AFMAn4l6VHgNpLR55T0+fkRsSh9/Aiwbi/a\nbI6I36WPnwfmRkRrer3ulbI2XoqIW9LjugVYW9LWwIHADyOiGBFFkrA6qKz9uZ3tNCK+DTwm6TSS\nUBEwoWyVW9P1/g78m2TE2G610XiZH5K8YQH4IPBk+8i7F/srP52cS7e5D/i6pBMlfTdts3yb6yLi\njfQ698+A/Tv05/0kI/h56ffrK8D6ktYGbgCuljST5Nr5uV0ckw1DDk+zgdXZ79QoklO1kARd+fK2\nHtrreO1sFPBoROwYETuko9vdgfvS55d32LarkCnX8QanFZ2utXrf2/uykjWPufx4AZZ21lh649WZ\nwMskoXdPh/52dSzdHdNNwLaS3kZyqrR91Nmb/a3RT0kfIjmt3Qb8L3BVh23Kv3851qzdKOCq9u8X\nsAMwNSKWR8RlJKeJ7wYOAf6YhqpVAYen2cD6PTBZ0o4AkrYhCbd70+f3LzvFejzJdcTudAyKB4Dt\nJO2Str8zyanKiRn7mevicXfell7PRdI0oDEiniM5FXuipLXSm5xOBO7soo2VlIL1fcB3I2IWsJjk\nGuOoHvqwkiRIR3f2ZES8AVwLnA5swer17cv+9ic5VX0l8BjJyLN8m6MlvUnSOJJTtv+XLm+v6R3A\nx9LT7QCntq8j6Q/AVhHxE5KfhfVIrqtaFXB4mvXPaiPD9OafI4ErJD0OXEdy085f01X+QXIN9GmS\nm1Z6uibZsf2XgQ8Dl0p6jOTU77SIeKUf/e7uztDy55YAn0qP62TgQ+ny84BlwB+Bp0hGsp/vZHtI\nguMLkk5Jt7tc0kPALJLR8+ZdbFcEiIg24FfAQ5I2p3M/Aj4FXJmeRm6XeX/AD4D3p6dc5wDPklwv\nbbeQ5A3TQ8CvI+LnHfo7m2T0e3dat0OAI9J1Tge+I+kRkj+/+UJEvNrFMdkwk/OUZGZDI73b9v3t\nf4pSTdK7bedGRKHHlc1qgEeeZtZbfqdtlvLI08zMLCOPPM3MzDJyeJqZmWXkj+cbAYrFYjGX6+1f\nG5iZWarPL5wOzxEgl8uxaNFAf254dSoU8q5FyrUocS1KXIuSQiHf52192tbMzCwjjzxHgKamJpYs\n8TtJgDFjVrkWKdeixLUocS1K+jPydHiOADMXNNPW+aeV1ZwJr8LSZa4FuBblXIsS1yLR2tLMl97R\n83pdcXiOAGuPz9OWG1vpbgwL4ybUuRYp16LEtShxLQaGr3mamZll5PA0MzPLyOFpZmaWkcPTzMws\nI4enmZlZRg5PMzOzjByeZmZmGTk8zczMMnJ4mpmZZeTwNDMzy8jhaWZmlpHD08zMLCOHZy9IOkDS\nsV08t46kowZgH4dK2qC/7ZiZ2eDzrCq9EBF3dPP0dsAHgFn93M3JwNPAK/1sx8zMBpnDsxckzQAO\nBDYBFgLvAOZHxEnAWcC2ko6NiKu62P5vJMH4NHANcBHJqH8icCKwLrA9MFPSHumyo4BVwA0Rcdkg\nHp6ZmWXk8MxmC2A/oBV4QdKXga8BJ3QVnKmNgO0iYrGkDwOnRcRT6eneYyLiBEmPAcen+/gw8G4g\nB9wl6Y6I+FNXjS9f1kwbrQNygNVuVPF1li9zLcC1KOdalLgWidaWZmD9Pm/v8MzmzxHRAiDpJaCu\nl9s1RsTi9PGLwLmSWoB6oKlsvRwwhWSE+7v0/28mCdQuw3P61HyWY6gBvf221ALXosS1KHEt+lsD\nh2c2xbLHufTrKmBUhu0uAY6OiJB0PklQlrcTwJMRcTCApFOAP3bXeENDA4sWNffqAEa6QiHvWqRc\nixLXosS1GBi+27b3il38/3lgiqTP9XLbnwI3S7qPZES5Ybr8QeA64O/A3ZLmSnoI2JxktGpmZsNE\nrljsmAlWbZqamoqNjX4nCTBxYh7XIuFalLgWJdVYi3y+nlwu1/OKGRUK+T436tO2A0TSIcBplEaZ\nufTxxRFx62Due+aCZtoYPZi7qBoTXoWly1wLcC3KuRYl1VaL1pZmpk2B+vqGSndlNQ7PARIRs4HZ\nldj32uPztOXGVmLXw864CXWuRcq1KHEtSqqzFisq3YE1+JqnmZlZRg5PMzOzjByeZmZmGTk8zczM\nMnJ4mpmZZeTwNDMzy8jhaWZmlpHD08zMLCOHp5mZWUYOTzMzs4wcnmZmZhk5PM3MzDJyeHYg6T2S\npvRhu79IGjMYfTIzs+HF4bmmTwAb9WE7T4xqZlYjqm5KMkkzgEOBPPAWYBZweETskj5/A/Bd4Gpg\nDrAt8CzwKrAn0AocDJwNvB1YD3gbcCrwT+BAYAdJT0XEP3qx/wsi4pck83ciaTJwEckbk4nAiREx\nT9JzwFxgK+CVtM9rBK6keuARYIuIKEr6BvCHiLi5P3UzM7OBU3XhmRoXEftJWg9YACyUtBVJQG4a\nEQ9JygPXp8H1DHBKRJwj6R5gctpOa0QcLGk/4LT08e3ArM6Cs4v9z5d0W9lzk9O2npJ0FHAMMA/Y\nDNg7Il6SNBfYOe37aiJiiaT7gQMk3QkcRBL0XVq+rJk2WnsoWW0YVXyd5ctcC3AtyrkWJdVWi9aW\nZqCu0t1YQ7WG530AEfGapH8DV5CE1N+B68vWezT9uhh4puxxXYfnF5Ltu9Nx/wVKp21fBM6V1ALU\nA03p8kUR8VIv93cV8DlgFHBXRKzsrjPTp+YzdL0WDL9ftMpxLUpci5JqqkUd9fX15HK5SndkNdUa\nnjsBSFqf5PTpL4AzgEZgWtl6PV2H7Oz5VSSh1dv91wOvkZ62BS4Bjo6IkHQ+sEkPba0hIh6QdDHJ\n9dduR50ADQ0NLFrUnHU3I1KhkHctUq5FiWtRUo21aGxcOijtFgp9H3hU6w1DkyT9FphNck1xOcn1\nzdciYnG6Tnkwdva4q2CdD1woSRn2v6qsveuBmyXdB2wBbNhDf7ryM2CDiHimxzXNzGxI5YrF6rpJ\nNL1hRxFxVofllwE3R8S9ldj/IOznDKAxIn7Si9WL1fZOcrBU47vqweJalLgWJa5FSaGQ7/O54Go9\nbbsaSXeQXFO8dwDb/AGwNaVRYi59fOMAtf9WYGYn7d8HbApMAg7pTVtNTU0sWeJfBoAxY1bVXC3y\n+eF3PchspKu6kaet6dK7/lFsY3SluzEsTBhfx9IqupOwv1pbmpk2pY76+oY1nvMIo8S1KHEtSmp+\n5Fnr1h6fpy03ttLdGBbGTairwVqsqHQHzGpOtd4wZGZmVjEOTzMzs4wcnmZmZhk5PM3MzDJyeJqZ\nmWXk8DQzM8vI4WlmZpaRw9PMzCwjh6eZmVlGDk8zM7OMHJ5mZmYZOTzNzMwycniWkfQeSVP6sN1f\nJI0ZjD6Zmdnw4/Bc3SeAjfqwned1MzOrIVU1JZmkGcChQB54CzALODwidkmfvwH4LnA1MAfYFngW\neBXYE2gFDgbOBt4OrAe8DTgV+CdwILCDpKci4h+92P8FEfFLkomskTQZuIjkTclE4MSImCfpOWAu\nsBXwStrnTgNX0j3AY8CUdD/TImJhH0tmZmaDoKrCMzUuIvaTtB6wAFgoaSuSgNw0Ih6SlAeuT4Pr\nGeCUiDgnDabJaTutEXGwpP2A09LHtwOzOgvOLvY/X9JtZc9NTtt6StJRwDHAPGAzYO+IeEnSXGDn\ntO9dmR8Rp0r6KnAU8K3uCrJ8WTNt1M4E0N0ZVXyd5TU2GTbUVbobZjWnGsPzPoCIeE3Sv4ErSELq\n78D1Zes9mn5dDDxT9riuw/MLyfbq03H/BUqnbV8EzpXUAtQDTenyRRHxUob9lfdt/Z46NH1qvve9\nrwm1FCZ11NfXk8vlOn22UPDPRjvXosS16L9qDM+dACStT3Ja8xfAGUAjMK1svZ6uQ3b2/CpgVIb9\n1wOvkZ62BS4Bjo6IkHQ+sEkPbWXpW5caGhpYtKi5j7saWQqFfM3VorFxaafLa7EWXXEtSlyLkv68\niajGG4YmSfotMJvkmuJykuubr0XE4nSd8vDp7HFX4TQfuFCSMux/VVl71wM3S7oP2ALYsIf+dMY3\nH5mZDXO5YrF6XqvTG3YUEWd1WH4ZcHNE3FuJ/VdaU1NTsbHR7yQBJk7MM5i1yOe7PkU63HiEUeJa\nlLgWJYVCvs+/zNV42nY1ku4guaZ47wC2+QNga0qjwFz6+MYBav+twMxO2r8vIr6ctb2ZC5ppY/RA\ndK3qTXgVli4bnFq0tjQzbQrU1zcMSvtmVj2qauRpnbvqwaZiW25spbsxLOQn1NG8dHDutl2+tIlD\nNltRNeHpEUaJa1HiWpT0Z+RZjdc8zczMKsrhaWZmlpHD08zMLCOHp5mZWUYOTzMzs4wcnmZmZhk5\nPM3MzDJyeJqZmWXk8DQzM8vI4WlmZpaRw9PMzCwjh6eZmVlGFQ9PSV+U9K4Baus8Scd3svzlgWi/\nl31YR9JR6eNrJb1vqPZtZmZDo+JTkkXEN4dgN0M5dcx2wAeAWUO4TzMzG0I9hmc6AfQhwNrABsAl\nwAeBycDngQnAKUAr8Cfg+Iho66KtTwPTgTbgoYg4RdK1JEEzCTgYGAdsBnwzImZ206/TgSOBFcCc\niDiz7Lm1gCtJ5uR8Aeh2vi5JfwOeTv99L922DlgOnAAcBqwTERdIGgM8DmwTESs7ae4sYFtJx3a3\nz3S/2wOXAitJ6nccMAr4OfASsDFwe0Sc3VNbZmY2dHo78pwQEQdKOhI4JSJ2k7QXcDqwFbB9RLRI\nuogkbC7vop0ZwKcj4mFJJ0ga1eH5+og4SNLmwGySCaPXIGkKcASwa0SsknSzpPeXrXIYMDYidk8n\nnj68h+PbCNguIhZLugG4OCLukLQv8A3gJOAB4AKSUeXsLoIT4GvACRFxlaR397DfHwOfiIgnJH2A\nJLjPADYB9geagbmSto+Ix7pqZPmyZtoYnDksq82o4ussXzY4tWhtaSZ5T2Vmta634flo+nUx8EzZ\n43HAUxHRki6bQ/Ki35VPAGdI2hT4PdBxItL2gFhI96PFrYB5EbEq/f9ckpFw++nZLYEFABGxUNLC\nbtoCaIyIxenjbYCzJH0x7d+KiGiS9IikPYCPA6f10F5vTYqIJ9LHc4AL02N4PCKaACTNB0SpNmuY\nPjU/QN0ZKQYr4Oqor68nl+vz/LlDrlDwz0Y716LEtei/3oZnV9cMi8DWksalAboX8Fw37RxHMip7\nQ9LtwG7d7Ke7V6hngdPS07NFYE/gOmD79PmngKOASyVtSHL6szvl+30G+E5EzJOktG2Aq0hOT9dF\nRHfHuIre34j1kqRt0gDdm6R2OZKa1pGckt4FuKa7RhoaGjwzfKpQyA9qLRoblw5a2wNtsGtRTVyL\nEteipD9vIvp7t+0K4DzgHkkPAm8BftjN+k+QnIb8HfAKMJ/ug7lTEfEkcBPwIDAPeCEibi17/jbg\nn5J+T3Iq9LUejqN8X58Hzpd0L0kg/zFtcw7J6PbaHtp6HthG0ue6O4bUccBlkuYAnwVOTZe/QXLd\n8/fAL8tGp2ZmNgzkisWhvBHVeiJpE2BWROze222ampqKjY1+JwkwcWKegaxFPl9dp2nLeYRR4lqU\nuBYlhUK+z7/cA/6nKukNOjMpjbpy6eP7IuLLGds6Dji6k7bOjIj5Gds6hORaZce2Li4ftWZo73+B\ndcoW5YDFEXFYh/UGrB5dmbmgmTZGD0RTVW/Cq7B02cDUorWlmWlToL6+YUDaM7ORwyPPEeCqB5uK\nbblu/xqnZuQn1NG8dGDutl2+tIlDNltRteHpEUaJa1HiWpT0Z+RZ8U8YMjMzqzYOTzMzs4wcnmZm\nZhk5PM3MzDJyeJqZmWXk8DQzM8vI4WlmZpaRw9PMzCwjh6eZmVlGDk8zM7OMHJ5mZmYZOTzNzMwy\nqsrwlPRFSe8aoLbOk3R8J8tf7mG7m9Ov90jaciD60sV+Thqsts3MrG8GfEqyoRAR3xyC3XQ73UxE\nHDEEfQA4G/jBEO3LzMx6YVDCU9IM4BBgbWAD4BLgg8Bk4PPABOAUoBX4E3B8RLR10dangelAG/BQ\nRJwi6VpgFjAJOBgYB2wGfDMiZnbTr9OBI4EVwJyIOLPsubWAK4GtgReAbuf4kvRyREzqvhIg6XDg\nJJJaF4HDIuJfki4FpgKjgfMiYnaHZecDU4B1JV0WEZ/paV9mZjY0BnPkOSEiDpR0JHBKROwmaS/g\ndGArYPuIaJF0EXACcHkX7cwAPh0RD0s6QdKoDs/XR8RBkjYHZpNMPL0GSVOAI4BdI2KVpJslvb9s\nlcOAsRGxezqB9eE9HF9vJ0LdAjg4IlolXQEcIGk58JaI2EVSA3BaelyrLYuI8yR9pqfgXL6smTYG\nZg7Lajeq+DrLlw1MLVpbmoG6AWnLzEaWwQzPR9Ovi4Fnyh6PA56KiJZ02Rxg/27a+QRwhqRNgd8D\nHScvfSz9upDuR4tbAfMiYlX6/7kkI+H2ENwSWAAQEQslLeymrSwWAddJWgYIeBB4G8mxEBFNwHmS\nvthxWW93MH1qfoC6OlIMVODVUV9fTy7X5/lyK65Q8M9GO9eixLXov8EMz65GZkVga0nj0gDdC3iu\nm3aOA06IiDck3Q7s1s1+unuVe5ZkhLdWus2ewHXA9unzTwFHAZdK2hDYuJu2ekVSPfBl4K1p3+5K\nvz4DTEvXaQBuJLmu+eHyZRFxIL24qauhocEzw6cKhfyA1qKxcemAtTXUBroW1cy1KHEtSvrzJqIS\nd9uuIBlV3SPpQeAtwA+7Wf8JYK6k3wGvAPPpPpg7FRFPAjeRjPzmAS9ExK1lz98G/FPS74HvAa/1\n8ni62+cSkhHuPOB+oAXYMN3XvyXdD/wGuCgiZndcljbzlKQur+OamdnQyxWLvb10Z8NY0e8kE35X\nXeJalLgWJa5FSaGQ7/M1mWHxpyrpDTozKY3icunj+yLiyxnbOg44upO2zoyI+RnbOgQ4rZO2Li4f\ntUraGfhWJ+vdGBE/yrLPvmhqamLJEv8yAIwZsypzLfL56r6uaWZDzyPPEeDSu/5RbGN0pbsxLEwY\nX8fSDHfbtrY0M21KHfX1DYPYq8rwCKPEtShxLUqqfuRp/bP2+DxtuW7/LLVmjJtQ14darBiUvpjZ\nyFWVH89nZmZWSQ5PMzOzjByeZmZmGTk8zczMMnJ4mpmZZeTwNDMzy8jhaWZmlpHD08zMLCOHp5mZ\nWUYOTzMzs4wcnmZmZhk5PM3MzDKqqfCUNEPShZXuR29JOlTSBpXuh5mZra6mwjNVTXOwnQzUV7oT\nZma2upqckkzSacBHSOaimhMRZ0raCPghMBaYBJwdEbdJehy4D9gWWAV8MCI6nQxP0mTgIpI3JROB\nEyNinqQ/AQ8AWwJ3Aw3AVODZiJgh6VrgdWBTYAPg48CGwPbATEl7RMTKro5n+bJm2uj9HJYj2aji\n6yzPOJ8n1A1eh8xsRKrF8NwS2AfYNSJWSbpZ0sFAK/CdiJgjaTfgfOA2kpHfzyLic5KuBw4Cbuqi\n7cnAaRHxlKSjgGOAeSShuDfwKvAvYOeICEnPS2ofWf41Ij4l6Vjg+Ij4tKTH0sddBifA9Kn5PpZi\npMoShnXU19eTy/V5TtxhrVDwz0Y716LEtei/WgzP7YHZEbEq/f9cktD7NXC2pE+my0eXbfNY+nUh\n3b8yvwicK6mFJHSb0uWNEfEigKSlERHp8qay9h4t28fuZW32+Kre0NDgmeFThUI+cy0aG5cOUm8q\nqy+1GKlcixLXoqQ/byJq8ZrnY8AukkZJygF7As8BXwGui4gZwD2sHlq9vU56CXBuRBwDPEHnwZfr\n4nFn+1hFbX6PzMyGtVp8YX6O5LTrAySnVF+IiFuBnwPflXQvsD/wlnT98lDrKUR/Ctws6T5gC5Lr\nlt21UexkWbkHSa55vrmH/ZqZ2RDKFYvVdPOpdaapqanY2OjTMAATJ+ZpbGwmnx+51zF7y6fnSlyL\nEteipFDI9/lFohavefaLpNHAnaw5WoyIOLECXWLmgmbaVrtEW7smvAqNi1qZNgXq6xsq3R0zG6Ec\nnhlFxAqSu3WHjbXH52nLja10N4aFcRPqqFvWSvJXSGZmg6MWr3mamZn1i8PTzMwsI4enmZlZRg5P\nMzOzjByeZmZmGTk8zczMMnJ4mpmZZeTwNDMzy8jhaWZmlpHD08zMLCOHp5mZWUYOTzMzs4z8wfDD\niKSxwLPAfwHrRsT9kv4HmB4RKyvbOzMza+fwHF7a55Y7AngFuD8ijq5gf8zMrBMOzwqTNB74GfBm\n4HlgI2AG8IakR4CbAEXEG5XrpZmZlXN4Vt6ngCci4hxJU4F9gZ8Ar0TEQ5I6Trq9huXLmmmjdZC7\nWR1GFV+ntaUZqKt0V8xsBHN4Vt6WwK8AImKBpI6zOOfW3GR106fmB6NfVaxAfX09uVyPpRvxCgX/\nbLRzLUpci/5zeFbe08DuwGxJOwCjgVVkuBO6oaGBRYuaB6l71aVQyLNoUTONjUsr3ZWKa6+FuRbl\nXIuS/ryJcHhW3hXATElzSO60bQUeBr4t6Vmgx9O2ZmY2tByeFRYRrwNHdvLUb9Kvm/XURlNTE0uW\n+J0kwJgxqygWcz5la2aDyuE5Asxc0EwboyvdjWFh1AvNHLwZ1Nc3VLorZjaCOTxHgLXH52nLja10\nN4aFUcXXAY/CzWxw+eP5zMzMMnJ4mpmZZeTwNDMzy8jhaWZmlpHD08zMLCOHp5mZWUYOTzMzs4wc\nnmZmZhk5PM3MzDJyeJqZmWXk8DQzM8vI4WlmZpaRPxh+AEm6BzgBOAp4OSKuzLj9ocA8kjk8z4mI\nz0j6C6CIeGPAO2xmZn3i8BxY/Z24+mTg6Yh4DvjMALVpZmYDzOHZR5LywFVAA7AhcHmGba8FZkXE\nnZIOAD4C/BzYHpgp6b+BmRGxG+BZnc3MhhmHZ99tThKAt0iaBNwH/KOPbRUj4v8kPQYcD7xBhhHn\n8mXNtNHax12PLKNYUekumFkNcHj23avAKZI+RDL78ug+ttNxZJl5pDl9ar6Pux6J6qivryeX84Ad\noFDwz0Y716LEteg/h2ffnQ48GBE/krQ38P4M27YCk9LHO5YtX0Uf7oBuaGhg0aLmrJuNSIVC3rVI\nuRYlrkWJa1HSnzcR/lOVvpsNfCa9w/ZkYAUwJn2up1OuVwGnSbqT5HppuweBmcC6Zct8w5CZ2TCT\nKxb92jwCFP1OMuF31SWuRYlrUeJalBQK+T5f3/Fp20EiaTRwJ2uOHCMiThzIfTU1NbFkSW3/MuTz\nvs5pZkPH4TlIImIFsM9Q7Gvmgmba+ny/UvVrbWlm2hSor2+odFfMrEY4PEeAtcfnacuNrXQ3Ksx/\nomJmQ8c3DJmZmWXk8DQzM8vI4WlmZpaRw9PMzCwjh6eZmVlGDk8zM7OMHJ5mZmYZOTzNzMwycnia\nmZll5PA0MzPLyOFpZmaWkcPTzMwsI4fnAJO0l6RZ3Tx/gKRjh7JPZmY2sDyryuDocobxiLhjKDti\nZmYDrybCU1IeuApoADYELgeOBB4DpgB5YBrJSHwW8Hdgc2B+RJwk6Tzg5Yi4UpKAKyJiH0mHAyeR\n1LEIHNaLvswAtgKu6LCvBRHxaUkTgeuAN6ebTI+I57trc/myZtpo7XU9RprWlmagrtLdMLMaUhPh\nSRJOsyLiFkmTgPuAf5CE46mSvgocBdwIbAHsB7QCz0tar5P22keWWwIHR0SrpCuAA4CXetGf9u07\n29dZwK1pUO8KTAW6Dc/pU/O92OVIVkd9fT25XA6AQqHW61HiWpS4FiWuRf/VSni+Cpwi6UNAMzA6\nXf5o+nUhsH76+M8R0QIg6WXWHNLkyh6/BlwnaRkg4MGM/epsXwKuBoiIecC8nhppaGhg0aLmjLse\nWRoblwLQThi9AAAK4ElEQVTJi0Kt16Kda1HiWpS4FiX9eRNRKzcMnQ48GBHTgZ9TCsAur02m2tdr\nBSalj3cCkFQPfBn4CHBsuk6uYwMZtG/7NMloE0l7SrqwH22amdkgqJWR52zgUkkfARYDK4CxXaxb\n7OTxjcBNkvYCHgaIiCWS5pKMDFcC/yK5nvrXDP3qbF8XAtdI+hiwCvhkhvbMzGwI5IrFngZfNtw1\nNTUVGxtr+zRMPp9c8/QpqRLXosS1KHEtSgqFfJ/PFtbKyHPISfoBsDWlEWUufXxQRLw+kPuauaCZ\ntv9cxq09rS3NTJsC9fUNle6KmdUIh+cgiYiThmpfa4/P05br6ix0rVhR6Q6YWQ2plRuGzMzMBozD\n08zMLCOHp5mZWUYOTzMzs4wcnmZmZhk5PM3MzDJyeJqZmWXk8DQzM8vI4WlmZpaRw9PMzCwjh6eZ\nmVlGDk8zM7OMai48JY2V1OUcmZLeI2lKN8/PGOwJqiWdNFT7MjOz7GouPIFJwLHdPP8JYKMe2hjs\nSVDPHsJ9mZlZRrU4JdlZwDslnQNMBeqBUcA5QBNwILCDpKeADwIfAsYBjcBhPTUuaSJwHfDmdNEM\n4KPAyxFxpSQBV0TEPl1sfxawjqTLgIf6fJRmZjZoajE8vwZsA+SBOyPiUkkbAnMjYjNJtwOzgBeB\ndSPivQDp8p170f7ZwK1pUO7axTZdjiYj4uuSPhMRn5E0ozcHtHxZM2209mbVEam1pRmoq3Q3zKyG\n1GJ4tnsn8DOAiHhJUpOkQvuTEVGUtELSLGAZyanc0b1oV8DVaRvzgHmSzit7PjdQB9Dus/tvPOBt\nVpf1V/tfoZCvUD+GH9eixLUocS36rxbDcxXJtd5ngD2BxyVtBKwD/DN9fpSkbYBDI2JXSWsDD9O7\n4Hua5HTwE5L2BA4G/gVsmD6/Yy/aqPEwNDMb3moxPF8jGUE2AJtLOoLknN9xEbFK0nzgQuAoYKmk\n+0nC7CVKAdidC4FrJH2MJIjb7+y9KQ3Th3vRxtOSZgK/zXBcZmY2RHLFom/mNDMzy6IWR54DQtL/\nkpzqbZcDFkdEj3fkptufA+xL6eahXPr4mIj420D21czMBpZHnmZmZhnV4ockmJmZ9YtP21YxSTng\ncmA7oBU4NiJeqGyvho6kNwHXAJsCY0j+hvdp4CckN2s9GREnVap/Q03SesAfgP2ANmq0DgCSvgR8\ngOTmwMuBOdRgPdLfketIfkdWAsdRYz8bknYBvhER+0h6B50cu6TjgOOBFcDXIuLXPbXrkWd1OxQY\nGxG7A2cCF1W4P0PtY0BjROxJ8slQl5HU4KyI2AtYS9IHK9nBoZK+SF4BtKSLarIOAJL2AnZLfy/2\nBt5G7dbjYGBURLwb+ArwdWqoFpI+D/wYGJsuWuPYJa0PfBbYjeR15EJJPf5Nv8Ozuu0B3A4QEfOB\nd1W2O0PuJpKPVYTkIxZXAjtGxP3pst+QjMJqwXeAH5L8SVWO2q0DwAHAk5JuAW4DfkXt1uM54E3p\nWaoGkpFVLdXiz6z+sao7dTj2/Un+Ln9uRKyMiCXAn4Bte2rY4Vnd6kk+j7fdSkk18z2NiJaIWCYp\nD/wc+H+s/gETzSQvGCOapI8Dr0XEXZSOv/znoCbqUGYisBNwBHAiySeJ1Wo9lgJvB54FfgRcQg39\njkTEL0neVLfreOz1JB/VWv46upRe1KRmXmhHqCUk3/h2a0XEqkp1phIkvRW4G7guIm4guZbRLg8s\nrkjHhtYxwP6S7iG5/j0TKJQ9Xyt1aPdP4I50JPEcyf0A5S+GtVSPU4HbI0KUfjbGlD1fS7WAzl8f\nlpCEaMfl3XJ4VrcHSK5pkH4I/ROV7c7QSq9V3AF8ISKuSxc/mn6SE8BBwP2dbjyCRMReEbFPOlPP\nY8B/A7+ptTqUmUty7Yp00ofxwO/Sa6FQW/X4F6VR1WKSm0QfrdFaADzSye/FQ8AeksZIagC2Ap7s\nqSHfbVvdfkky4ngg/f8xlexMBZxJMvXbOZLOJfmQiZOBS9ML/s8AN1ewf5V0BvDjWqxDRPw6ndR+\nAclpuhOBvwJX1WA9vk/ycaFzSO48/hLJR4TWYi2gk9+LdBKQS0jedOVIbih6o6eG/CEJZmZmGfm0\nrZmZWUYOTzMzs4wcnmZmZhk5PM3MzDJyeJqZmWXk8DQzM8vIf+dpNkxJ2oTks0mfIvn7s7VIPv1k\nZkSc38N290bE27tZZ2fg8Ij4kqRDSD7zs8s2e9HXnYATIuL4vraRcX//6f9Q7M+sI4en2fD2YkTs\n2P4fSZOAP0maFRHRzXY9/QH31sB6ABExG5jdn05GxMMkUzoNlf/036wSHJ5m1WXD9GszgKQvAh8m\nGZXe0XEkJmkKyYeBjycJm+8CPwUuAMZLOpNkJpa9gV8Ax0fEIem2JwFbAKcB3wb2Ipm95icRcXGH\n/ewFnJ/OmXgP8CjJbB11wOfSf1sD34uIiyWdB2wJvANYF7gyIr6Tzv7xfeC9JJ9Den1EfCtt/1vp\ncb4I7FDW/8uAq4GN0vrMiYgZ6TZnkUzT9k7gj8DREbFS0qnACSQfGv6rdAS+HsmHp2+c7vusiPhd\n7781Vkt8zdNseNtI0iOSnpG0iCT0Do2IlyQdQDJ7yLuAHYGNJR3dYftPAl+JiF2AfYGvR0QTcC5w\nW0RcmK5XJJmiaYf08z0BjgKuJ5lAuRgR7wJ2AQ6V9O5O+lo+2i1GxLbp9peQTAu1J3Be2TqTgX3S\n/p8gaXvgU8DGETEl3dfhkg5K198C2CciDu3Q//cDj6ZzVm4J7C5ph3Sb3YBPk4TnJsAB6SnfT6X7\n3Q7YMV3/YuDqiNgZ+CDwI0njOzlOM4en2TD3YkTsGBHvpDQjxj3pc/uRzEX4MPAISZBO7rD96cDa\nkr4EfI1kBNqpiFhJMvo8XNLbgHUj4g/pfj4g6VFgPskIb5se+v2b9OvfgHkR8XpE/J3VZzeZFRHL\n0zkUbyUZbe4L/CTtz3KS6cTeW+piLO2k3zcAv5V0MnApyUh2Qvr0kxHxckQUST7LdF2SEJ8dEUsj\noi0i3hcR7SPlC9Lj/A3JKPsdPRyn1SiftjWrHl8gmTXlDOCbJC/u34+I7wNIqic5DVk+HdnPSabo\nmg3cABzZwz5+BnyFJGT+J102imTmmlvS/byFZM7D7pR/sPbKLtYpXz6KZKLmXId1cpRep5Z31oik\nzwIfIjnlehcwpayd1rJVi+nyFR22n0RyanctYN+IWFy2/JUu+m41ziNPs+HtP2ESEW0kwfn/0utz\ndwP/LWm8pDeRjN6O6LD9fsC56U1BewOk1xVX0smb54iYT3Ld8GMkp1xJ93O8pDdJmkAy+8Qu/T0e\n4DBJoyWtA/wXcCfJqHqGpLUkjQM+SmmkXa68//sBP0pHoDlge5Iw7sr9wEGSxqV1m0Uyar8bOAlA\n0tYk10jH9e0wbaRzeJoNb6vdNRsRdwC/B74aEb8iOc06n+SF/pGImNlh+/OBByT9AdifZGqutwML\ngF0lfb3jPoAbgaUR8df0/1eQ/MnMo+l2V0fEnN72uZvnlpME8QMk12KfJRk9vgg8TnI6+paIuLWT\ndsr7/z3g/PQYL0vb6+zPdIoA6Snay4B56THdGxF3k9zUtKukx0kC9aMRsaybY7Ea5inJzGzIpXfb\nFiPigkr3xawvPPI0MzPLyCNPMzOzjDzyNDMzy8jhaWZmlpHD08zMLCOHp5mZWUYOTzMzs4wcnmZm\nZhn9f6qhvL/yXzRlAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1270e5590>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# RFR\n",
    "clf3 = RandomForestRegressor(n_jobs=-1, max_depth=10,random_state=0)\n",
    "clf3.fit(x_train, y_train)\n",
    "print clf3.score(x_test, y_test)\n",
    "test_pd3 = pd.DataFrame()\n",
    "test_pd3['predict'] = clf3.predict(x_test)\n",
    "test_pd3['label'] = y_test\n",
    "print compute_ks(test_pd[['label','predict']])\n",
    "print clf3.feature_importances_\n",
    "# Top Ten\n",
    "feature_importance = clf3.feature_importances_\n",
    "feature_importance = 100.0 * (feature_importance / feature_importance.max())\n",
    "\n",
    "indices = np.argsort(feature_importance)[-10:]\n",
    "plt.barh(np.arange(10), feature_importance[indices],color='dodgerblue',alpha=.4)\n",
    "plt.yticks(np.arange(10 + 0.25), np.array(X.columns)[indices])\n",
    "_ = plt.xlabel('Relative importance'), plt.title('Top Ten Important Variables')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.020808034579\n",
      "0.0\n",
      "[ 0.00950112  0.17496689  0.00476969  0.00538677  0.00898343  0.01604885\n",
      "  0.0139889   0.00605683  0.0042762   0.00358536  0.0144985   0.00915189\n",
      "  0.00643305  0.00637134  0.0050764   0.00218012  0.00925068  0.00363339\n",
      "  0.00988441  0.00645297  0.00662444  0.00934969  0.00739012  0.00635592\n",
      "  0.00633908  0.00923972  0.01263829  0.01190224  0.00914159  0.00402144\n",
      "  0.00917841  0.01456563  0.01161155  0.01097394  0.00506868  0.00772159\n",
      "  0.00560163  0.01132941  0.00172528  0.0085601   0.01282485  0.00970629\n",
      "  0.00956066  0.00731205  0.02087289  0.00430205  0.0062769   0.00765693\n",
      "  0.00922104  0.00296456  0.00563208  0.00459181  0.0133819   0.00548208\n",
      "  0.00450864  0.0132415   0.00677772  0.00509891  0.00108962  0.00578448\n",
      "  0.00934323  0.00715127  0.01078137  0.00855071  0.00695096  0.01488993\n",
      "  0.00317962  0.00485367  0.00476553  0.00509674  0.          0.00733654\n",
      "  0.00097223  0.00380448  0.00534715  0.00356893  0.0128526   0.11944538\n",
      "  0.11758343  0.00195945  0.00225379  0.00243429  0.0007562   0.        ]\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdEAAAEZCAYAAADIeB9CAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXFWd/vFPEUISQnUr0mFTEFwelqAsEgSRTVBEGVmM\nI8gEQWSRcRTGQeHHoiCouMyAiKCAEmECDuOgoIAw7GgSNlFZvrgwGAlIGk3TSachdOr3xzltFU2v\nt6uru9PP+/XKq29X3XvOud9O11Pn3tt1S5VKBTMzMxu6NUZ7AGZmZuOVQ9TMzKwgh6iZmVlBDlEz\nM7OCHKJmZmYFOUTNzMwKWnO0B2A2Hkk6D9gtf7sV8EegE6gAO0fEC8Ns/0fA64ES8FbgN8AqYElE\nvHs4bef2pwArgHUiomO47Q2x7+OAzoj4XsHt1wBuBvavHbukZmAxsEtEPNRjm58DP4qIi4bQz8+B\nYyLiiX7WmQfcFREX9nh81OprjeUQNSsgIj7VvSzpj8ChEfFgHds/KLc9CXgR2DUi2uvVfjZafyS+\nG3DXMLZfC9iz54MR0SbpSuBIoPbnsznwNuCAoXRShzcr/iP8CcAhajZ8pfzv7yTtDnwFmEIKwVMj\n4mZJHwMOBiYDGwGLgDkR8ewQ298NOAeYCrwEnB4RP5d0DLA3KWg2BzqAwyLid301LknAtcACYMfc\n10nAccAWwD0RMSevdx2wEJhJmhkfFxELJK0FnAe8E+gCfgGcGBErJD0N3EaaUX8FeA/wDkmdwI3A\nxcCrcz3+AMyOiKV5uwuBdwOvBX4QEacDl+Wh/0LS3hHRWrM73wZ+LukzEbEyP3ZU3rZD0kYD9Nc9\nzs/kft4FRN63HYCmvN9HRMR9uf29JB0OTAd+Bny2lxofm8dRAp4FPhkRv5e0F/DlmlXPjIjr+/hR\n2Rjkc6JmdSZpPeBqUsBsB3wM+E9Jr82r7AIcHRFbkw7TnjfE9ltIQfChiHgbMBu4TNL6eZXdgaMi\nYhvgIeDEwTRLCpqtgfnAV0lhvw3wXknb5vXeCFwTEdsCZ+b9BDgLKOf1t83LZ9e0f19EbB0Rc4Gb\ngC9HxGXAR4BbImJX4A2kkDmkZrvJEfHOvE+flbQBcATVw+a1AUo+GvA4cGCu1ZrAR4EL8ioD9Xd/\nHucNVGeSuwLNEbFzrs81wL/VbLMB6c3DDsA7gDkvK6z07lzLXSJiB9Ibg+66nQl8MSJmkd60vGKG\nbWObQ9Ss/nYGHu0+vBsRvyUF0+75+RtrzrN9lzTTGopdgQ2B6yU9CPyENBudmZ9fEBFL8vIDwLqD\naLM9Iv43L/8BuDsiOvP5vGdq2lgcEdfm/boWmCZpK2Bf4NsRUYmICim03lvT/t29dRoRXwV+JelE\nUrgIWKdmlR/n9f4E/I00g+z2stl5jW+T3rgAfAD4bfdMfBD91R5mLuVt7gDOkXScpK/nNmu3uTwi\nXsznwa8E9ukxnveRZvTz88/rLGB9SdOAq4BLJc0lnVs/vY99sjHKIWpWf739Xk0iHcKFFHi1j3cN\n0F7Pc2uTgAcjYvuI2C7PdncB7sjPr+ixbV9hU6vnhVAre13r5WPvHstLvHKfa/cXYFlvjeULtE4G\nniaF3209xtvXvvS3Tz8E3iJpE9Ih1O5Z6GD6e8U4JR1EOtzdBfw3cEmPbWp/fiVeWbtJwCXdPy9g\nO2BWRKyIiAtIh49vBfYHfp3D1cYJh6hZ/f0S2FrS9gCStiGF3O35+X1qDr0eTTrP2J+egXEP8FZJ\nO+X2dyQdwlxviOMs9bHcn03y+V4kzQZaI+Jx0iHa4yStkS+GOg74eR9tvEQ1YN8NfD0i5gFLSecg\nJw0whpdIgTq5tycj4kXge8C/Am/i5fUt0t8+pEPY3wF+RZqJ1m5zqKQ1Ja1NOpT7s/x4d01vAg7L\nh+EBTuheR9J9wBYR8X3S/4UZpPOuNk44RM2G72UzxXyR0D8CF0l6CLicdHHP/+VV/kw6R/oI6eKW\ngc5Z9mz/aeBDwDcl/Yp0SHh2RDwzjHH3dyVp7XPPA8fm/foUcFB+/AxgOfBr4GHSzPbfetkeUoCc\nJOnTebsLJd0LzCPNpt/Yx3YVgIjoAq4H7pX0Rnp3MXAs8J18eLnbkPsDvgW8Lx+KvRN4jHQ+tdsi\n0hune4GfRsR/9RjvdaTZ8K25bvsDH8zr/CvwNUkPkP5s56SI+Esf+2RjUMm3QjNrnHx17vu6/4Rl\nPMlX594dES0Drmw2QXgmamZD4XfdZjU8EzUzMyvIM1EzM7OCHKJmZmYF+WP/ViOVSqVSKg32LxXM\nzCwr/MLpEF2NlEolliyp92eUj08tLWXXInMtqlyLKteiqqWlXHhbH841MzMryDPR1UhbWxvPP+93\nlgBrrbXKtchciyrXosq1qBrOTNQhuhqZu7Cdrt4/CW3CWecvsGy5awGuRS3Xosq1SDo72vncGwZe\nry8O0dXItOllukpTRnsYY8La60x1LTLXosq1qHIt6sPnRM3MzApyiJqZmRXkEDUzMyvIIWpmZlaQ\nQ9TMzKwgh6iZmVlBDlEzM7OCHKJmZmYFOUTNzMwKcoiamZkV5BA1MzMryCFqZmZWkEN0CCS9R9JR\nfTz3akmH1KGPAyRtMNx2zMxs5PkuLkMQETf18/RbgX8A5g2zm08BjwDPDLMdMzMbYQ7RIZB0OLAv\nsCmwCHgDsCAijgdOAd4i6aiIuKSP7Z8kBeQjwGXAN0hHA9YDjgPWBbYF5kraNT92CLAKuCoiLhjB\n3TMzsyFyiBbzJmBvoBP4o6QvAGcDx/QVoNnGwFsjYqmkDwEnRsTD+TDwERFxjKRfAUfnPj4EvAMo\nATdLuikiftdX4yuWt9NFZ112cLybVHmBFctdC3AtarkWVa5F0tnRDqxfeHuHaDG/j4gOAEmLgamD\n3K41Ipbm5aeA0yV1AE1AW816JWAmacb7v/n7V5GCtc8QnTOrPJR9mAAG+2OZCFyLKteiyrUYbg0c\nosVUapZL+esqYNIQtjsfODQiQtLnSYFZ204Av42I/QAkfRr4dX+NNzc3s2RJ+6B2YHXX0lJ2LTLX\nosq1qHIt6sNX5w5dpY/v/wDMlPQvg9z2B8A1ku4gzTA3yo//Argc+BNwq6S7Jd0LvJE0ezUzszGi\nVKn0zAQbr9ra2iqtrX5nCbDeemVci8S1qGpkLcrlJkql0sArjhLPRKtaWsqFf1A+nFtnkvYHTqQ6\n6yzl5fMi4scj2ffche10MXkkuxg31vkLLFvuWoBrUatRtejsaGf2TGhqah7xvmx0OUTrLCKuA64b\njb6nTS/TVZoyGl2POWuvM9W1yFyLqsbWYmWD+rHR5HOiZmZmBTlEzczMCnKImpmZFeQQNTMzK8gh\namZmVpBD1MzMrCCHqJmZWUEOUTMzs4IcomZmZgU5RM3MzApyiJqZmRXkEDUzMyvIIdoLSe+UNLPA\ndk9IWmskxmRmZmOPQ7R3RwIbF9jON2c1M5tAxuWt0CQdDhwAlIHXAPOAgyNip/z8VcDXgUuBO4G3\nAI8BfwF2AzqB/YBTgc2AGcAmwAnAc8C+wHaSHo6IPw+i/zMj4n9I9w5F0tbAN0hvUtYDjouI+ZIe\nB+4GtgCeyWPuNXgl3Qb8CpiZ+5kdEYsKlszMzEbAuAzRbO2I2FvSDGAhsEjSFqSgfH1E3CupDFyR\nA+xR4NMRcVoOqK1zO50RsZ+kvYET8/KNwLzeArSP/hdI+knNc1vnth6WdAhwBDAf2BzYIyIWS7ob\n2DGPvS8LIuIESV8EDgHO7a8gK5a300Vnf6tMGJMqL7BiuWsBrkWtRtWis6MdmDri/djoG88hegdA\nRDwr6W/ARaSw+hNwRc16D+avS4FHa5an9nh+EUP7X9+z/xaqh3OfAk6X1AE0AW358SURsXgI/dWO\nbf2BBjRnVnnwo58Q/CJW5VpUNaIWU2lqaqJUKjWgr+JaWvyaMVzjOUR3AJC0Pulw54+AzwCtwOya\n9QY6T9nb86uASUPovwl4lnw4FzgfODQiQtLngU0HaGsoY+tTc3MzS5a0F+xq9dLSUnYtMteiqpG1\naG1d1pB+ivL/i6rhvJkYzxcWbSjpFuA60jnHFaTzn89GxNK8Tm0I9bbcV0gtAL4kSUPof1VNe1cA\n10i6A3gTsNEA4+mNL1IyMxvjSpXK+Hutzhf2KCJO6fH4BcA1EXH7aPQ/BlT8zjLxu+wq16LKtahy\nLapaWsqFj7uP58O5LyPpJtI5x9vr2Oa3gK2ozgpLefnqOrX/OmBuL+3fERFfGGp7bW1tPP+8fykA\n1lprlWuRuRZVI12Lcnnsnwe1+hqXM1Hr3Tdv/nOli8mjPYwxYZ3pU1nmK1IB16LWSNais6Od2TOn\n0tTUPCLt15tnolWeiRoA06aX6SpNGe1hjAlrrzPVtchci6qRr8XKEWzbxqLxfGGRmZnZqHKImpmZ\nFeQQNTMzK8ghamZmVpBD1MzMrCCHqJmZWUEOUTMzs4IcomZmZgU5RM3MzApyiJqZmRXkEDUzMyvI\nIWpmZlaQQ7QPkt4paWaB7Z6QtNZIjMnMzMYWh2jfjgQ2LrCd7y1nZjZBjNtboUk6HDgAKAOvAeYB\nB0fETvn5q4CvA5cCdwJvAR4D/gLsBnQC+wGnApsBM4BNgBOA54B9ge0kPRwRfx5E/2dGxP+QbqyN\npK2Bb5DeqKwHHBcR8yU9DtwNbAE8k8f8iuCV1AQ8ALwpIiqSvgzcFxHXDKduZmZWP+M2RLO1I2Jv\nSTOAhcAiSVuQgvL1EXGvpDJwRQ6wR4FPR8Rpkm4Dts7tdEbEfpL2Bk7MyzcC83oL0D76XyDpJzXP\nbZ3beljSIcARwHxgc2CPiFgs6W5gxzz2l4mI5yXdBbxH0s+B95ICv08rlrfThW++DDCp8gIrfCNq\nwLWoNZK16OxoB6aOSNs2do33EL0DICKelfQ34CJSWP0JuKJmvQfz16XAozXLU3s8v4ih/Rb07L+F\n6uHcp4DTJXUATUBbfnxJRCweZH+XAP8CTAJujoiX+hvMnFnlIQx9IvALWpVrUTVStZhKU1MTpVJp\nhNqvv5YWv2YM13gP0R0AJK1POqz6I+AzQCswu2a9gc5T9vb8KlJ4Dbb/JuBZ8uFc4Hzg0IgISZ8H\nNh2grVeIiHsknUc6P9vvLBSgubmZJUvah9rNaqmlpexaZK5F1UjXorV12Yi1XW/+f1E1nDcT4/3C\nog0l3QJcRzrnuIJ0/vPZiFia16kNyN6W+wrYBcCXJGkI/a+qae8K4BpJdwBvAjYaYDx9uRLYICIe\nHXBNMzNrqFKlMj4vJs0X9igiTunx+AXANRFx+2j0PwL9fAZojYjvD7RuW1tbpbV17L6zLJcbd6jL\n77KrXIsq16LKtahqaSkXfmEa74dzX0bSTaRzjrfXsc1vAVtRnTWW8vLVdWr/dcDcXtq/A3g9sCGw\n/2DamruwnS4m12NYddfZ0c7smdDU1DzaQzEzq5txOxO1V7rkF22VrtKU0R5Gr1Ysa2P/zVc2LET9\nLrvKtahyLapci6rhzETH+zlRMzOzUeMQNTMzK8ghamZmVpBD1MzMrCCHqJmZWUEOUTMzs4IcomZm\nZgU5RM3MzApyiJqZmRXkEDUzMyvIIWpmZlaQQ9TMzKygYYeopAMkbSBpU0m/rMegerT/DUmvrXe7\njSRppqR39vP87pLmNXJMZmY2fPW4FdqngEeAFxjcTaaHJCJOrHebo+Bg4Bngrn7W8e10zMzGmQFD\nNN98en9gGrABcD7wAWBr4N+AbUn3w/wnYIakHwEbAb+OiKMlHQScBLwILI6ID/fT19nAHsAk4L8j\n4quSbgOOAQ4BNgNmAJsAJ0TEzZLeD5yem3ggIo6VtDvwReAl4A/AMRHR1UeftwHPAq8G3g9cCLyR\nNEs/LSLuGGQfxwIfAfYD1gY2B74C3AJ8FHhB0v0RcV8fu/9mSTcArwEuiojLJO0E/DvpHqNPAR+J\niBf6qp+ZmTXWYA/nrhMR7wPOBY6NiIOAo4EjgAeBOaSQLJMCY2dgL0nrAR8Gzo2I3YDrJTX1088h\n+d9uwNL8WO0MrTMi9gM+DZwgaRLwTeC9ETEL+L2kTYDvAAdGxJ7A4jym/lwZEe8GjiTd1HsP4ADg\nWwX6aIqI/UlvNE6OiMXA94Fv9BOgkN7QvD/v+0m5dhcBH42InYGfAlv2txMrlrezYlnbmPzX2eH7\nFprZ6mewh3MfzF+XAo/WLE/tsd4fI+J5AEnPkmZkJwInS/pk3vbafvo5jDR7Wx+4IT9We7PU7nEs\nyn2vB/wtIp4DiIivSWoBNgR+KAnSDPrmAfbv8fx1G2DXPAMskWbEGwzQRymP5WbSjPRXNWMcyh2y\n5+fZcpekR4DXA+tHxOO53+8N1MCcWeUhdNdoU2lqaqJUKnzv2yFraRnL9Wgs16LKtahyLYZvsCHa\n3/m6VaSw6an71fJo4IyIaJV0EXAg8IOeK0taC5gdEYfk7x+RdNUA43gWaJb0qohYKuk84ApSgH0g\nItol7Q8MNA1alb8+BiyKiC9LmgqcQpplDraPTXuMsVTTfm81qrW9pDVIob8l8HtgsaQ3RMQfJJ0E\nRET8uK8Gmpubx/Sd6ltblzWsr5aW8piuRSO5FlWuRZVrUTWcNxPDvTq3AvwCuBxYl5cHSPfyQuCn\nkm4hzTCv762hiHgR+Kuk+fk85Y0RsaimnVcEeURUgOOBn0m6Mz92L+lw788k3QMcB/x2gH3odjGw\npaTbgXuAJ3MfnyjYR3fb9wPH5/OofVlBmn3fSnrTsZR0nvV7uR7bAj/rZ3szM2uwUqXii0JXF21t\nbZXW1tF5Z1kuN/ZQ7UD8LrvKtahyLapci6qWlnLhF696/InLkEjakXSBUnd6l/Ly1RFx8Qj1+TrS\nFcQ9+7wjIr4wEn32MY7TgL16GccREfHkcNufu7CdLiYPt5kh6+xoZ/ZMaGpqbnjfZmajqeEhmg+F\n7tngPhc1us8+xnEWcNZItT9tepmu0lCuZaqnlaPUr5nZ6PHH/pmZmRXkEDUzMyvIIWpmZlaQQ9TM\nzKwgh6iZmVlBDlEzM7OCHKJmZmYFOUTNzMwKcoiamZkV5BA1MzMryCFqZmZWkEPUzMysoAkXopKm\nSHqij+d2lzQvL19Txz7fJekXkm6X9MN8w28zMxvnJlyIUr39WF8qABHxwTr2eQHwDxGxB/B74Kg6\ntm1mZqOk4bdCGw2SpgNXAq8C/pAfmwmcn1d5DjiyxzZPR8SGkm4DfgXMBMrA7IhYJOkcYB/gKWAG\n8OGI+FMfQ9gjIlrz8ppAp6Szgaci4kJJrwJuAf4V+ArwAvAdYEvSLdwmAf8dEV8dZinMzKyOJkSI\nAscCv4mI0yTNIt0Y+zvAkRHxmKQjgc8CN9dsUztbXRARJ0j6InBIDtZ3RMSOkpqBx/rrPCL+AiDp\nIGAP4FRgY2AecCFwKHBFXn1KRLw9r//HvP4zwOED7eSK5e100TnQanXX2dEO+Ai1mU08EyVE3wxc\nDxARCyWtJM3yLpQEMBn4XT/bP5i/LgLWBzYD7svttUl6aKABSPo0cDDwnoh4EXhC0vOStgQ+AuwP\nbANEzWaHkWam6wM3DNTHnFnlgVYZIVNpamqiVCqNUv+9a2kZrXqMPa5FlWtR5VoM30QJ0UeAXYDr\nJG1HCs0A5kTEnyXtAmzQY5vaROh5DvU3wCcllYBpwFb9dS7p/wHbAXtHxAs1T30XOA1YFBF/zYG+\nKm+zFunQ8SH5+0ckXRURi/rqp7m5mSVL2vsbyohpbV02Kv32paWlPGq1GGtciyrXosq1qBrOm4mJ\nEqIXAXMl3UkKz07gOOAHktYkBdfHSIdYu1V6fP27iHhU0o+A+cBfgBf76ljSDOB04H7gRkkV4OqI\nuBi4FvgW6XBuzz5elPRXSfOBDuDG/gLUzMwar1Sp9Hehqg2GpF8C/9jPhUV9bbc2cFtE7FSnoVT8\nzjLxu+wq16LKtahyLapaWsqFz0VNlJnoSKsAMyRdTnXm2v2nNN2zzpeRtDNwMXBGvQbR1tbG8883\n7peiXB5750HNzBrJIVoHEbFLXtxzCNv8EnhLPccxd2E7XUyuZ5N96uxoZ/ZMaGpqbkh/ZmZjkUN0\nNTJtepmu0pQG9riygX2ZmY09E/ETi8zMzOrCIWpmZlaQQ9TMzKwgh6iZmVlBDlEzM7OCHKJmZmYF\nOUTNzMwKcoiamZkV5BA1MzMryCFqZmZWkEPUzMysIIeomZlZQQ7RHiRNkfSxOrSzu6R5efma4Y/M\nzMzGGofoK20IHFWntioAEfHBOrVnZmZjiG+F9kqnAFtKOhWYBUwhBeupwG3AfOBDpICcB7wjIpb1\n16CkpyNiQ0nbAOflh58DjgS2Bz4LvAhsRrqJ9zmSDgJOyo8vjogPDzTwFcvb6aJzqPtbSGdHOzC1\nIX2ZmY1VDtFXOhuYCfwCuDMi7pS0M/CFiPiJpMOBS/K6hw0UoFklf/0OcEREPCbpSFJ43gxsAmwD\nTAMWA+cAHwbOjYgfSTpMUlNEPN9fJ3NmlYe2p8MylaamJkqlUgP7HJqWlkbWY2xzLapciyrXYvgc\non17Bji15vzomgARcZ+kpcALEfGbIba5JXChJIDJwO/y47+JiArQIakjP3YicLKkTwKPAtcO1Hhz\nczNLlrQPcUjFtbYO5v3D6GhpKTe0FmOZa1HlWlS5FlXDeTPhc6KvtAqYBJwFXB4Rh5MO45YAJH0Q\naAdeknTwINvsnq49BsyJiL1Is9Dr8+OVXtY9GjgjIvYk/ZwOLLY7ZmY2UjwTfaVnSbPErYCvSToZ\n+DOwnqRNgC8Au5Jqd6ekhRGxaIA2u0PyE8APJK1JCuuPARv3se5C4KeS2kmhfT1mZjamlCqVysBr\n2bjQ1tZWaW1t3OGZcnnsnhP1oaoq16LKtahyLapaWsqFX8g8Ex0mSacBe1GdQZby8hER8WQjxzJ3\nYTtdTG5IX50d7cyeCU1NzQ3pz8xsLHKIDlNEnEU6fzrqpk0v01Wa0sAeVzawLzOzsccXFpmZmRXk\nEDUzMyvIIWpmZlaQQ9TMzKwgh6iZmVlBDlEzM7OCHKJmZmYFOUTNzMwKcoiamZkV5BA1MzMryCFq\nZmZWkEPUzMysoAkbopKmSPpYP8+/U9LMfp4/XNKXRmZ0f+/j+Eb1ZWZmQzdhQxTYEDiqn+eP5JU3\nzO5ppG/GemoD+zIzsyGayLdCOwXYMt8PdBbQBEwCTgPagH2B7SQ9DHwAOAhYG2gFDhyocUnrAZcD\nr8oPHQ58BHg6Ir4jScBFEbFnH9ufArxa0gXAvYX30szMRsxEDtGzgW2AMvDziPimpI2AuyNic0k3\nAvOAp4B1I+JdAPnxHQfR/qnAj3Ngvr2PbfqcXUbEOZL+OSL+WdLhg9mhFcvb6aJzMKsOW2dHOzC1\nIX2ZmY1VEzlEu20JXAkQEYsltUlq6X4yIiqSVkqaBywnHeKdPIh2BVya25gPzJd0Rs3zpXrtQLc5\ns8r1brIfU2lqaqJUqvtu1E1LSyPrMba5FlWuRZVrMXwTOURXkc4JPwrsBjwkaWPg1cBz+flJkrYB\nDoiIt0uaBtzP4ALwEdJh4t9I2g3YD/grsFF+fvtBtDGkhGpubmbJkvahbDIsra3LGtbXULW0lBta\ni7HMtahyLapci6rhvJmYyCH6LGlG2Qy8UdIHSccnPx4RqyQtAL4EHAIsk3QXKdQWUw3C/nwJuEzS\nYaRA7r4S+Ic5VO8fRBuPSJoL3DKE/TIzswYpVSq+6HN10dbWVmltbdw7y3J57B7O9bvsKteiyrWo\nci2qWlrKhV/IJvJMtC4k/TfpEHC3ErA0Iga8gjdvfxqwF9WLjEp5+YiIeHIoY5m7sJ2uQZ2uHb7O\njnZmz4SmpuaG9GdmNhY5RIcpIg4e5vZnAWfVYyzTppfpKk2pR1ODtLKBfZmZjT0T+cMWzMzMhsUh\namZmVpBD1MzMrCCHqJmZWUEOUTMzs4IcomZmZgU5RM3MzApyiJqZmRXkEDUzMyvIIWpmZlaQQ9TM\nzKwgh6iZmVlBDtE6kbS7pHmjPQ4zM2sch2h9+easZmYTiG+FNgBJhwMHAGXgNaTbln0dUES8KOlL\nwKPAk8CbJd2Q17soIi7ro81NgeuAVuBnwI3A+fnp54AjI6Jd0jeBWcBk4IyIuG6EdtPMzApwiA7O\n2hGxt6QZwEL6nsGvCbyfFHoPSfpxRDzXx7rrA9tFRJekX5Juwv2YpCOBz0q6D3hNROwkqRk4kRS8\nfVqxvJ0uOgvs3tB1drQDUxvSl5nZWOUQHZw7ACLiWUl/A7aoea5Uszw/IrqALkmPAK8nzSx780Re\nF2BL4EJJkAL4d8CbgV/mftuAMwYa5JxZ5cHuTx1MpampiVKpNPCqo6SlpZH1GNtciyrXosq1GD6H\n6ODsACBpfaAJ+BOwkaQngW2BR/J620taA5hGCto/9NNm7fnTx4A5EfFnSbsAGwArgQ/lfpuBqyNi\n3/4G2dzczJIl7UPdt8JaW5c1rK+hamkpN7QWY5lrUeVaVLkWVcN5M+EQHZwNJd1CCtDjgI1I5zKf\nAP5as94K4AbgVaRzmEv7abM2RD8B/EDSmsAq4GMR8XtJ+0i6C5gEfL5eO2NmZvVRqlR8QWl/8oVF\niohTRnssg1DxO8vE77KrXIsq16LKtahqaSkXPi/lmegIkvRx4FCqs85SXj45IhbUu7+2tjaef74x\nvxTl8tg+H2pm1ggO0QFExOXD2Pa7wHfrOJx+zV3YTheTR7yfzo52Zs+EpqbmEe/LzGwsc4iuRqZN\nL9NVmtKg3lY2qB8zs7HLn1hkZmZWkEPUzMysIIeomZlZQQ5RMzOzghyiZmZmBTlEzczMCnKImpmZ\nFeQQNTMzK8ghamZmVpBD1MzMrCCHqJmZWUEOUTMzs4LGfIhKeo+ko+rQzu6S5hXY7vgBnm+RdK2k\n2yXdJWmzQbY7U9KuefkJSWsNdWxmZja6xnyIRsRNEXFJnZorcgfyUwd4/lzgiojYAzgN2GKQ7R4M\nbDWMcZmZ2Sgb87dCk3Q4sC+wKbAIeAOwMCI+IWkD4D9JIfQIMDMi9hxEm8cDBwFrA63AgcBmwPdI\n9/hag3RDr5vHAAAMVUlEQVQz7cOBdSVdEBH/3Edz7wAeknQz8ATwKUmbAlfn8XYvzwS2A64HLgQ+\nCrwg6UHSzbq/LWnzvC8HAjN6jicinupvv1Ysb6eLzoF2f9g6O9qBqSPej5nZWDfmQ7TGm4C9gU7g\nD5JmkGZ+V0bEpZIOAbYeZFuviYh3AUi6EdiRFHALgJOA3YDmiDhH0j/3E6AArwf+GhH7SDoN+Cwp\n/DbL451OCtcN89ifjIhTJX0feDoi7pUEcElE/FLSZcA+pBB92XiAfkN0zqzyIHd/uKbS1NREqVRq\nUH/FtLQ0qh5jn2tR5VpUuRbDN55C9PcR0QEg6WnSVGgz4KL8/G3AxwfZ1ov5/OhyYGNgMnApKQBv\nApYCp+R1B0qKVuC6vHwd8EVSiP4xIpZJWgk8ExFteeyr+mjngfz1L6QZcl/j6VNzczNLlrQPtFpd\ntLYua0g/RbW0lBtWi7HOtahyLapci6rhvJkY8+dEa9SeN+wOtoeAd+blHQfTiKRtgAMi4hDgk8Ck\n3N4HgLsiYm/gGlKA1fbVl7uB/fLybsDDvaxT6mV5FS+vf8/zon2Nx8zMxojxMhPtGTDd338F+L6k\ng4C/DbKt3wHLJN1FCrTFwEakQ6eXS3qRFG6fzus/LGluRMzpo73PAJdI+gRpxngo8KoeY+5t+X7g\nXEmP9fH8fT3Gc8Ig98/MzBqkVKmsHheGKp1Y/HZE7DXaYxktbW1tldbWkT08Uy6P/XOh4ENVtVyL\nKteiyrWoamkpF35RGy8z0UGTdCrwLqozulJePiIinizY5uuAub20eUdEfGF4I66fuQvb6WLyiLXf\n2dHO7JnQ1NQ8Yn2YmY0nq02IRkQA3bPQL9a57UXAgH86M9qmTS/TVZoywr2sHOH2zczGj/F0YZGZ\nmdmY4hA1MzMryCFqZmZWkEPUzMysIIeomZlZQQ5RMzOzghyiZmZmBTlEzczMCnKImpmZFeQQNTMz\nK8ghamZmVpBD1MzMrKDV5gPoxxpJmwJXRcTOw2jjAGA+MGW4bZmZWf15Jjqyhnuz1k8BTXVqy8zM\n6mzCzkQlHQ7sB6wNbA6cC3wUOCYiHpd0DLA+cDlwNbAI2DQvzwS2BX4WEf+vn25mSLo2t/PTiPii\npDcClwCTgQ7gwxHxXC/j2y/3MRf4p+HvsZmZ1duEDdGsKSLem4PtOuDpPtbbDNgbmA48AWwIdAJP\nAv2F6HTgMFJY3inpJ8CZwNkRcbOk9wPbAbf03DAifibpQeAY4MXB7MyK5e100TmYVQvp7GgHpo5Y\n+2Zm481ED9Ff5a+LeGU6lGqW/xgRyyStBJ6JiDYASasGaP+hiFiW170XeHP+Nx8gIq4fYPtSj3H0\n65P7vHbQ6xaz/sg2X2ctLeXRHsKY4VpUuRZVrsXwTfRzoj3PM3YCG+Xl7fvYptTHcm+2krS2pDWB\nWcBvgUfzMpIOlXR8P9uvovozGuGANDOzoZroIVqrApwPXCjpBl5em8oglnvzHOkc6t3Af0XEY8BJ\nwMmSbgUOBa7sZ/tfkM6JrjuIvszMrMFKlYpfm83MzIqY6OdEh03Sx0kzyu53I6W8fHJELBjE9vsD\nJ/ay/XkR8eP6j9jMzOrFM1EzM7OCfE7UzMysIB/OHecklYALgbeSri4+KiL+OLqjapx85fNlwOuB\ntYCzgUeA75Oubv5tRPR3BfRqR9IM4D7S3zZ3MUFrIelzwD+QPtjkQuBOJmAt8u/I5aTfkZeAjzMB\n/19I2gn4ckTsKekN9LL/+fTc0cBK0t/z/3Sgdj0THf8OAKZExC7AycA3Rnk8jXYY0BoRuwH7AheQ\nanBKROwOrCHpA6M5wEbKL5gXkT7gAyZoLSTtDuycfy/2ADZhgtaC9MlskyLiHcBZwDlMsFpI+jfg\nu6TPIYde9l/S+sAngZ1JryVfkjR5oLYdouPfrsCNAPlCpreN7nAa7ofAaXl5Eumd9vYRcVd+7AbS\njGyi+BrwbWAx6SK1iVqL9wC/zR+7+RPgeiZuLR4H1sxHrZpJs6yJVovfAwfWfL9Dj/3fh/T3+3dH\nxEsR8TzwO+AtAzXsEB3/moC2mu9fkjRhfq4R0RERyyWVgf8ifQxj7QdTtJNeOFZ7kj4KPBsRN1Ot\nQe3/hQlTC2A9YAfgg8BxpL/Hnqi1WEb66NLHgItJfw8/oX5HIuJ/SG+wu/Xc/yagzMtfS5cxiLpM\nmBfb1djzpB9+tzUiYqCPI1ytSHodcCtweURcRTrP0a0MLB2VgTXeEcA+km4jnSOfC7TUPD+RavEc\ncFOeVTxOul6g9gVxItXiBODGiBDV/xdr1Tw/kWrRrbfXiOep3jWr9vF+OUTHv3tI5zyQ9HbgN6M7\nnMbK5zFuAk6KiMvzww9K2i0vvxe4q9eNVzMRsXtE7BkRe5I+F/qfgBsmYi1InxK2L4CkjUg3g/jf\nfK4UJlYt/kp1hrWUdEHpgxO0Ft0e6OX34l5gV0lrSWoGtiB9VGu/fHXu+Pc/pNnHPfn7I0ZzMKPg\nZOBVwGmSTid9UMWngG/miwIeBa4ZxfGNts8A351otYiIn0p6p6SFpEN3xwH/B1wy0WoB/AdwmaQ7\nSVcqfw64n4lZi26v+L2IiIqk80lvwEqkC48GvIOWP2zBzMysIB/ONTMzK8ghamZmVpBD1MzMrCCH\nqJmZWUEOUTMzs4IcomZmZgX570TNxjhJm5I+//Rh0t+vrUH6NJW5EfH5Aba7PSI262edHYGDI+Jz\n+QbxO/TX5iDGugNwTEQcXbSNIfb39/E3oj+znhyiZuPDUxGxffc3kjYEfidpXkREP9sN9IfgWwEz\nACLiOuC64QwyIu4n3UqqUf4+frPR4BA1G582yl/bASR9FvgQaZZ6U8+ZmaSZpA8en04Kna8DPwDO\nBKZLOpl055c9gB8BR0fE/nnb44E3AScCXwV2J90x5/sRcV6PfnYHPp/v2Xgb8CDpDiFTgX/J/7YC\n/j0izpN0BvBm4A3AusB3IuJr+Y4j/wG8i/Q5p1dExLm5/XPzfj4FbFcz/guAS4GNc33ujIjD8zan\nkG4PtyXwa+DQiHhJ0gnAMaQPJ78+z8hnkD6o/bW571Mi4n8H/6OxicTnRM3Gh40lPSDpUUlLSOF3\nQEQslvQe0h1L3gZsD7xW0qE9tv8YcFZE7ATsBZwTEW3A6cBPIuJLeb0K6dZQ2+XPDwU4BLiCdDPn\nSkS8DdgJOEDSO3oZa+3stxIRb8nbn0+6HdVuwBk162wN7JnHf4ykbYFjgddGxMzc18GS3pvXfxOw\nZ0Qc0GP87wMezPfNfDOwi6Tt8jY7A58gheimwHvyoeBjc79vBbbP658HXBoROwIfAC6WNL2X/TRz\niJqNE09FxPYRsSXVu3Dclp/bm3QvxPuBB0iBunWP7f8VmCbpc8DZpBlpryLiJdJs9GBJmwDrRsR9\nuZ9/kPQgsIA049tmgHHfkL8+CcyPiBci4k+8/I4q8yJiRb6H449Js8+9gO/n8awg3crsXdUhxrJe\nxn0VcIukTwHfJM1s18lP/zYino6ICumzUtclhfl1EbEsIroi4t0R0T1zPjPv5w2kWfcbBthPm6B8\nONds/DmJdJeWzwBfIb3I/0dE/AeApCbS4cna26D9F+n2YNcBVwH/OEAfVwJnkcLmP/Njk0h3y7k2\n9/Ma0j0X+1P7Ad4v9bFO7eOTSDeNLvVYp0T19WpFb41I+iRwEOlQ7M3AzJp2OmtWreTHV/bYfkPS\nId81gL0iYmnN48/0MXab4DwTNRsf/h4qEdFFCtD/l8/f3Qr8k6TpktYkzeY+2GP7vYHT88VDewDk\n844v0cub6YhYQDqveBjpUCy5n6MlrSlpHdLdLnYa7v4AB0qaLOnVwPuBn5Nm2YdLWkPS2sBHqM68\na9WOf2/g4jwjLQHbkkK5L3cB75W0dq7bPNIs/lbgeABJW5HOoa5dbDdtdecQNRsfXnaVbUTcBPwS\n+GJEXE86/LqA9IL/QETM7bH954F7JN0H7EO6LdhmwELg7ZLO6dkHcDWwLCL+L39/EelPbR7M210a\nEXcOdsz9PLeCFMj3kM7VPkaaTT4FPEQ6TH1tRPy4l3Zqx//vwOfzPl6Q2+vtz3sqAPnQ7QXA/LxP\nt0fEraSLn94u6SFSsH4kIpb3sy82gflWaGY2avLVuZWIOHO0x2JWhGeiZmZmBXkmamZmVpBnomZm\nZgU5RM3MzApyiJqZmRXkEDUzMyvIIWpmZlaQQ9TMzKyg/w8hycl3QEtk6QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x119855e10>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# XTR\n",
    "clf4 = ExtraTreesRegressor(n_jobs=-1, max_depth=10,random_state=0)\n",
    "clf4.fit(x_train, y_train)\n",
    "print clf4.score(x_test, y_test)\n",
    "test_pd4 = pd.DataFrame()\n",
    "test_pd4['predict'] = clf4.predict(x_test)\n",
    "test_pd4['label'] = y_test\n",
    "print compute_ks(test_pd[['label','predict']])\n",
    "print clf4.feature_importances_\n",
    "# Top Ten\n",
    "feature_importance = clf4.feature_importances_\n",
    "feature_importance = 100.0 * (feature_importance / feature_importance.max())\n",
    "\n",
    "indices = np.argsort(feature_importance)[-10:]\n",
    "plt.barh(np.arange(10), feature_importance[indices],color='dodgerblue',alpha=.4)\n",
    "plt.yticks(np.arange(10 + 0.25), np.array(X.columns)[indices])\n",
    "_ = plt.xlabel('Relative importance'), plt.title('Top Ten Important Variables')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 作业：stacking"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
